##########
[work@10.10.10.10]$ cat collector.conf # if you have any problem, please visit https://github.com/alibaba/MongoShake/wiki/FAQ # for the detail explanation, please visit xxxx # 如果有問題,請先查看FAQ文檔以及wiki上的說明。 # 關於各個參數的詳細說明,請參考:xxx # current configuration version, do not modify. # 當前配置文件的版本號,請不要修改該值。 conf.version = 2 # --------------------------- global configuration --------------------------- # collector name # id用於輸出pid文件等信息。 id = mongoshake # high availability option. # enable master election if set true. only one mongoshake can become master # and do sync, the others will wait and at most one of them become master once # previous master die. The master information stores in the `mongoshake` db in the source # database by default. # This option is useless when there is only one mongoshake running. # 如果開啟主備mongoshake拉取同一個源端,此參數需要開啟。 master_quorum = false # http api interface. Users can use this api to monitor mongoshake. # `curl 127.0.0.1:9100`. # We also provide a restful tool named "mongoshake-stat" to # print ack, lsn, checkpoint and qps information based on this api. # usage: `./mongoshake-stat --port=9100` # 全量和增量的restful監控端口,可以用curl查看內部監控metric統計情況。詳見wiki。 full_sync.http_port = 9101 incr_sync.http_port = 9100 # profiling on net/http/profile # profiling端口,用於查看內部go堆棧。 system_profile_port = 9200 # global log level: debug, info, warning, error. lower level message will be filter log.level = info # log directory. log and pid file will be stored into this file. # if not set, default is "./logs/" # log和pid文件的目錄,如果不設置默認打到當前路徑的logs目錄。 log.dir = # log file name. # log文件名。 log.file = collector.log # log flush enable. If set false, logs may not be print when exit. If # set true, performance will be decreased extremely # 設置log刷新,false表示包含緩存,如果true那么每條log都會直接刷屏,但對性能有影響; # 反之,退出不一定能打印所有的log,調試時建議配置true。 log.flush = false # sync mode: all/full/incr. default is incr. # all means full synchronization + incremental synchronization. # full means full synchronization only. # incr means incremental synchronization only. # 同步模式,all表示全量+增量同步,full表示全量同步,incr表示增量同步。 #sync_mode = incr sync_mode = all # connect source mongodb, set username and password if enable authority. Please note: password shouldn't contain '@'. # split by comma(,) if use multiple instance in one replica-set. E.g., mongodb://username1:password1@primaryA,secondaryB,secondaryC # split by semicolon(;) if sharding enable. E.g., mongodb://username1:password1@primaryA,secondaryB,secondaryC;mongodb://username2:password2@primaryX,secondaryY,secondaryZ # 源MongoDB連接串信息,逗號分隔同一個副本集內的結點,分號分隔分片sharding實例,免密模式 # 可以忽略“username:password@”,注意,密碼里面不能含有'@'符號。 # 舉例: # 副本集:mongodb://username1:password1@primaryA,secondaryB,secondaryC # 分片集:mongodb://username1:password1@primaryA,secondaryB,secondaryC;mongodb://username2:password2@primaryX,secondaryY,secondaryZ mongo_urls = mongodb://igoodful:123456@10.10.10.11:27000,10.10.10.12:27000 # please fill the source config server url if source mongodb is sharding. mongo_cs_url = # please give one mongos address if using change stream to fetching data in incremental stage. # 如果源端采用change stream拉取,這里還需要配置一個mongos的地址 mongo_s_url = # tunnel pipeline type. now we support rpc,file,kafka,mock,direct # 通道模式。 tunnel = direct # tunnel target resource url # for rpc. this is remote receiver socket address # for tcp. this is remote receiver socket address # for file. this is the file path, for instance "data" # for kafka. this is the topic and brokers address which split by comma, for # instance: topic@brokers1,brokers2, default topic is "mongoshake" # for mock. this is uesless # for direct. this is target mongodb address which format is the same as `mongo_urls`. If # the target is sharding, this should be the mongos address. # direct模式用於直接寫入MongoDB,其余模式用於一些分析,或者遠距離傳輸場景, # 注意,如果是非direct模式,需要通過receiver進行解析,具體參考FAQ文檔。 # 此處配置通道的地址,格式與mongo_urls對齊。 #tunnel.address = mongodb://user:password@host:port tunnel.address = mongodb://igoodful:123456@10.10.10.21:27000 # the message format in the tunnel, used when tunnel is kafka. # "raw": batched raw data format which has good performance but encoded so that users # should parse it by receiver. # "json": single oplog format by json. # "bson": single oplog format by bson. # 通道數據的類型,只用於kafka和file通道類型。 # raw是默認的類型,其采用聚合的模式進行寫入和 # 讀取,但是由於攜帶了一些控制信息,所以需要專門用receiver進行解析。 # json以json的格式寫入kafka,便於用戶直接讀取。 # bson以bson二進制的格式寫入kafka。 tunnel.message = raw # connect mode: # primary: fetch data from primary. # secondaryPreferred: fetch data from secondary if has, otherwise primary.(default) # standalone: fetch data from given 1 node, no matter primary, secondary or hidden. This is only # support when tunnel type is direct. # 連接模式,primary表示從主上拉取,secondaryPreferred表示優先從secondary拉取(默認建議值), # standalone表示從任意單個結點拉取。 mongo_connect_mode = secondaryPreferred # filter db or collection namespace. at most one of these two parameters can be given. # if the filter.namespace.black is not empty, the given namespace will be # filtered while others namespace passed. # if the filter.namespace.white is not empty, the given namespace will be # passed while others filtered. # all the namespace will be passed if no condition given. # db and collection connected by the dot(.). # different namespaces are split by the semicolon(;). # filter: filterDbName1.filterCollectionName1;filterDbName2 # 黑白名單過濾,目前不支持正則,白名單表示通過的namespace,黑名單表示過濾的namespace, # 不能同時指定。分號分割不同namespace,每個namespace可以是db,也可以是db.collection。 #filter.namespace.black = filter.namespace.white = mktact.themis_template_field; mktact.themis_template_module; mktact.goods_allow_list # some databases like "admin", "local", "mongoshake", "config", "system.views" are # filtered, users can enable these database based on some special needs. # different database are split by the semicolon(;). # e.g., admin;mongoshake. # pay attention: collection isn't support like "admin.xxx" except "system.views" # 正常情況下,不建議配置該參數,但對於有些非常特殊的場景,用戶可以啟用admin,mongoshake等庫的同步, # 以分號分割,例如:admin;mongoshake。 filter.pass.special.db = mktact # only transfer oplog commands for syncing. represent # by oplog.op are "i","d","u". # DDL will be transferred if disable like create index, drop databse, # transaction in mongodb 4.0. # 是否需要開啟DDL同步,true表示開啟,源是sharding暫時不支持開啟。 # 如果目的端是sharding,暫時不支持applyOps命令,包括事務。 filter.ddl_enable = true # checkpoint info, used in resuming from break point. # checkpoint存儲信息,用於支持斷點續傳。 # context.storage.url is used to mark the checkpoint store database. E.g., mongodb://127.0.0.1:20070 # if not set, checkpoint will be written into source mongodb when source mongodb is replica-set(db=mongoshake), # when source mongodb is sharding, the checkpoint will be written into config-server(db=admin) # checkpoint的具體寫入的MongoDB地址,如果不配置,對於副本集將寫入源庫(db=mongoshake),對於分片集 # 將寫入config-server(db=admin) checkpoint.storage.url = # checkpoint db's name. # checkpoint存儲的db的名字 checkpoint.storage.db = mongoshake # checkpoint collection's name. # checkpoint存儲的表的名字,如果啟動多個mongoshake拉取同一個源可以修改這個表名以防止沖突。 checkpoint.storage.collection = ckpt_default # real checkpoint: the fetching oplog position. # pay attention: this is UTC time which is 8 hours latter than CST time. this # variable will only be used when checkpoint is not exist. # 本次開始拉取的位置,如果checkpoint已經存在(位於上述存儲位置)則該參數無效, # 如果需要強制該位置開始拉取,需要先刪除原來的checkpoint,詳見FAQ。 # 若checkpoint不存在,且該值為1970-01-01T00:00:00Z,則會拉取源端現有的所有oplog。 # 若checkpoint不存在,且該值不為1970-01-01T00:00:00Z,則會先檢查源端oplog最老的時間是否 # 大於給定的時間,如果是則會直接報錯退出。 checkpoint.start_position = 1970-01-01T00:00:00Z # transform from source db or collection namespace to dest db or collection namespace. # at most one of these two parameters can be given. # transform: fromDbName1.fromCollectionName1:toDbName1.toCollectionName1;fromDbName2:toDbName2 # 轉換命名空間,比如a.b同步后變成c.d,謹慎建議開啟,比較耗性能。 #transform.namespace = ucenter.award:award.award;ucenter.award_config:award.award_config;ucenter.award_order:award.award_order;ucenter.address_info:award.address_info #transform.namespace = ucenter:award #transform.namespace = ucenter.athena_user_task_v2:athena.athena_user_task_v3
# 數據庫名稱和集合名稱的映射,可能需要變更目的的數據庫名稱和集合名稱 transform.namespace = mktact.themis_template_field:bifrost.themis_template_field; mktact.themis_template_module:bifrost.themis_template_module; mktact.goods_allow_list:bifrost.goods_allow_list # --------------------------- full sync configuration --------------------------- # the number of collection concurrence # 並發最大拉取的表個數,例如,6表示同一時刻shake最多拉取6個表。 full_sync.reader.collection_parallel = 16 # the number of document writer thread in each collection. # 同一個表內並發寫的線程數,例如,8表示對於同一個表,將會有8個寫線程進行並發寫入。 full_sync.reader.write_document_parallel = 32 # number of documents in a batch insert in a document concurrence # 目的端寫入的batch大小,例如,128表示一個線程將會一次聚合128個文檔然后再寫入。 full_sync.reader.document_batch_size = 10240 full_sync.reader.read_document_count=0 # drop the same name of collection in dest mongodb in full synchronization # 同步時如果目的庫存在,是否先刪除目的庫再進行同步。 full_sync.collection_exist_no_drop = true # create foreground indexes when data sync finish in full sync stage. # 全量期間數據同步完畢后,是否需要創建索引,none表示不創建,foreground表示創建前台索引, # background表示創建后台索引。 full_sync.create_index = background # convert insert to update when duplicate key found # 如果_id存在在目的庫,是否將insert語句修改為update語句。 full_sync.executor.insert_on_dup_update = true # filter orphan document for source type is sharding. # 源端是sharding,是否需要過濾orphan文檔 full_sync.executor.filter.orphan_document = false # enable majority write in full sync. # the performance will degrade if enable. # 全量階段寫入端是否啟用majority write full_sync.executor.majority_enable = false # --------------------------- incrmental sync configuration --------------------------- # fetch method: # oplog: fetch oplog from source mongodb (default) # change_stream: use change to receive change event from source mongodb, support MongoDB >= 4.0 incr_sync.mongo_fetch_method = oplog # global id. used in active-active replication. # this parameter is not supported on current open-source version. # gid用於雙活防止環形復制,目前只用於阿里雲雲上MongoDB,如果是阿里雲雲上實例互相同步 # 希望開啟gid,請聯系阿里雲售后或者燭昭(vinllen),sharding的有多個gid請以分號(;)分隔。 incr_sync.oplog.gids = # distribute data to different worker by hash key to run in parallel. # rker = 8 # [auto] decide by if there has unique index in collections. # use `collection` if has unique index otherwise use `id`. # [id] shard by ObjectId. handle oplogs in sequence by unique _id # [collection] shard by ns. handle oplogs in sequence by unique ns # hash的方式,id表示按文檔hash,collection表示按表hash,auto表示自動選擇hash類型。 # 如果沒有索引建議選擇id達到非常高的同步性能,反之請選擇collection。 incr_sync.shard_key = auto # oplog transmit worker concurrent # if the source is sharding, worker number must equal to shard numbers. # 內部發送的worker數目,如果機器性能足夠,可以提高worker個數。 incr_sync.worker = 128 # batched oplogs have block level checksum value using # crc32 algorithm. and compressor for compressing content # of oplog entry. # supported compressor are : gzip,zlib,deflate # Do not enable this option when tunnel type is "direct" # 是否啟用發送,非direct模式發送可以選擇壓縮以減少網絡帶寬消耗。 incr_sync.worker.oplog_compressor = none # memory queue configuration, plz visit FAQ document to see more details. # do not modify these variables if the performance and resource usage can # meet your needs. # 內部隊列的配置參數,如果目前性能足夠不建議修改,詳細信息參考FAQ。 incr_sync.worker.batch_queue_size = 64 incr_sync.adaptive.batching_max_size = 1024 incr_sync.fetcher.buffer_capacity = 256 # --- direct tunnel only begin --- # if tunnel type is direct, all the below variable should be set # 下列參數僅用於tunnel為direct的情況。 # oplog changes to Insert while Update found non-exist (_id or unique-index) # 如果_id不存在在目的庫,是否將update語句修改為insert語句。 incr_sync.executor.upsert = false # oplog changes to Update while Insert found duplicated key (_id or unique-index) # 如果_id存在在目的庫,是否將insert語句修改為update語句。 incr_sync.executor.insert_on_dup_update = false # db. write duplicated logs to mongoshake_conflict # sdk. write duplicated logs to sdk. # 如果寫入存在沖突,記錄沖突的文檔。 incr_sync.conflict_write_to = none # enable majority write in incrmental sync. # the performance will degrade if enable. # 增量階段寫入端是否啟用majority write incr_sync.executor.majority_enable = false # --- direct tunnel only end ---
###################
##################