From bffad33b743f00513ed37628db1449e632d02bce Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Thu, 5 Jan 2023 19:05:36 +0000 Subject: [PATCH] Tx size estimation allows varied in, out types Prior to this commit, p2wsh inputs from fidelity bonds resulted in miscalculation of transaction fees, even in cases where the exact set of inputs were known (such as a direct send). In this commit we change the estimation to a model in which the caller of jmbitcoin.secp256k1_transaction.estimate_tx_size must specify a list of types, one for each input to the transaction, and the same for outputs. In some cases, the caller of the function uses the default script type of the wallet, but in other cases where the caller can know the exact types of each utxo used as input, and each destination used as output, they are specified explicitly. In particular, the use of fidelity bond outputs as input to transactions can be accounted for. Currently this is only done in direct send payments; coinjoins still fall back to assuming all inputs the same type (note that it is not possible to use fidelity bond utxos as inputs to coinjoins). Note also that the burn destination calculation in direct send is removed, since it is not used, so the maintenance burden is best avoided. --- jmbitcoin/jmbitcoin/secp256k1_transaction.py | 143 +++++++++++-------- jmclient/jmclient/taker_utils.py | 62 ++++---- jmclient/jmclient/wallet.py | 44 +++++- jmclient/test/test_taker.py | 12 +- 4 files changed, 154 insertions(+), 107 deletions(-) diff --git a/jmbitcoin/jmbitcoin/secp256k1_transaction.py b/jmbitcoin/jmbitcoin/secp256k1_transaction.py index a42b364..3d4b33b 100644 --- a/jmbitcoin/jmbitcoin/secp256k1_transaction.py +++ b/jmbitcoin/jmbitcoin/secp256k1_transaction.py @@ -106,70 +106,89 @@ def human_readable_output(txoutput): pass # non standard script return outdict -def estimate_tx_size(ins, outs, txtype='p2pkh', outtype=None): +def there_is_one_segwit_input(x): + # note that we need separate input types for + # any distinct types of scripthash inputs supported, + # since each may have a different size of witness; in + # that case, the internal list in this list comprehension + # will need updating. + return any([y in ["p2sh-p2wpkh", "p2wpkh", "p2wsh"] for y in x]) + +def estimate_tx_size(ins, outs): '''Estimate transaction size. - The txtype field as detailed below is used to distinguish - the type, but there is at least one source of meaningful roughness: - we assume that the scriptPubKey type of all the outputs are the same as - the input, unless `outtype` is specified, in which case *one* of - the outputs is assumed to be that other type, with all of the other - outputs being of the same type as before. - This, combined with a few bytes variation in signature sizes means - we will sometimes see small inaccuracies in this estimate. - - Assuming p2pkh: - out: 8+1+3+20+2=34, in: 32+4+1+1+~72+1+33+4=148, - ver: 4, locktime:4, +2 (len in,out) - total = 34*len_out + 148*len_in + 10 (sig sizes vary slightly) - - Assuming p2sh M of N multisig: - "ins" must contain M, N so ins= (numins, M, N) (crude assuming all same) - 73*M + 34*N + 45 per input, so total ins ~ len_ins * (45+73M+34N) - so total ~ 32*len_out + (45+73M+34N)*len_in + 10 - - Assuming p2sh-p2wpkh: - witness are roughly 1+1+~72+1+33 for each input - (txid, vin, 4+20 for witness program encoded as scriptsig, 4 for sequence) - non-witness input fields are roughly 32+4+4+20+4=64, so total becomes - n_in * 64 + 4(ver) + 2(marker, flag) + 2(n_in, n_out) + 4(locktime) + n_out*32 - - Assuming p2wpkh native: - witness as previous case - non-witness loses the 24 witnessprogram, replaced with 1 zero, - in the scriptSig, so becomes: - 4 + 1 + 1 + (n_in) + (vin) + (n_out) + (vout) + (witness) + (locktime) - non-witness: 4(ver) +2 (marker, flag) + n_in*41 + 4(locktime) +2 (len in, out) + n_out*31 - witness: 1 + 1 + 72 + 1 + 33 + Both arguments `ins` and `outs` must be lists of script types, + and they must be present in the keys of the dicts `inmults`, + `outmults` defined here. + Note that variation in ECDSA signature sizes means + we will sometimes see small inaccuracies in this estimate, but + that this is ameliorated by the existence of the witness discount, + in actually estimating fees. + The value '72' is used for the most-likely size of these ECDSA + signatures, due to 30[1 byte] + len(rest)[1 byte] + type:02 [1 byte] + len(r)[1] + r[32 or 33] + type:02[1] + len(s)[1] + s[32] + sighash_all [1] + ... though as can be seen, 71 is also likely: + r length 33 occurs when the value is 'negative' (>N/2) and a byte x80 is prepended, + but shorter values for r are possible if rare. + Returns: + Either a single integer, if the transaction will be non-segwit, + or a tuple (int, int) for witness and non-witness bytes respectively). ''' - if txtype == 'p2pkh': - return 4 + 4 + 2 + ins*148 + 34*outs + ( - OUTPUT_EXTRA_BYTES[txtype][outtype] - if outtype and outtype in OUTPUT_EXTRA_BYTES[txtype] else 0) - elif txtype == 'p2sh-p2wpkh': - #return the estimate for the witness and non-witness - #portions of the transaction, assuming that all the inputs - #are of segwit type p2sh-p2wpkh - # Note as of Jan19: this misses 2 bytes (trivial) for len in, out - # and also overestimates output size by 2 bytes. - witness_estimate = ins*108 - non_witness_estimate = 4 + 4 + 4 + outs*32 + ins*64 + ( - OUTPUT_EXTRA_BYTES[txtype][outtype] - if outtype and outtype in OUTPUT_EXTRA_BYTES[txtype] else 0) - return (witness_estimate, non_witness_estimate) - elif txtype == 'p2wpkh': - witness_estimate = ins*108 - non_witness_estimate = 4 + 4 + 4 + outs*31 + ins*41 + ( - OUTPUT_EXTRA_BYTES[txtype][outtype] - if outtype and outtype in OUTPUT_EXTRA_BYTES[txtype] else 0) - return (witness_estimate, non_witness_estimate) - elif txtype == 'p2shMofN': - ins, M, N = ins - return 4 + 4 + 2 + (45 + 73*M + 34*N)*ins + outs*32 + ( - OUTPUT_EXTRA_BYTES['p2sh-p2wpkh'][outtype] - if outtype and outtype in OUTPUT_EXTRA_BYTES['p2sh-p2wpkh'] else 0) - else: - raise NotImplementedError("Transaction size estimation not" + - "yet implemented for type: " + txtype) + + # All non-witness input sizes include: txid, index, sequence, + # which is 32, 4 and 4; the remaining is scriptSig which is 1 + # at minimum, for native segwit (the byte x00). Hence 41 is the minimum. + # The witness field for p2wpkh consists of sig, pub so 72 + 33 + 1 byte + # for the number of witness elements and 2 bytes for the size of each element, + # hence 108. + # For p2pkh, 148 comes from 32+4+1+1+~72+1+33+4 + # For p2sh-p2wpkh there is an additional 23 bytes of witness for the redeemscript. + # + # Note that p2wsh here is specific to the script + # we use for fidelity bonds; 43 is the bytes required for that + # script's redeemscript field in the witness, but for arbitrary scripts, + # the witness portion could be any other size. + # Hence, we may need to modify this later. + inmults = {"p2wsh": {"w": 1 + 72 + 43, "nw": 41}, + "p2wpkh": {"w": 108, "nw": 41}, + "p2sh-p2wpkh": {"w": 108, "nw": 64}, + "p2pkh": {"w": 0, "nw": 148}} + + # Notes: in outputs, there is only 1 'scripthash' + # type for either segwit/nonsegwit. + # p2wsh has structure 8 bytes output, then: + # x22,x00,x20,(32 byte hash), so 32 + 3 + 8 + # note also there is no need to distinguish witness + # here, outputs are always entirely nonwitness. + outmults = {"p2wsh": 43, + "p2wpkh": 31, + "p2sh-p2wpkh": 64, + "p2pkh": 34} + + # nVersion, nLockTime, nins, nouts: + nwsize = 4 + 4 + 2 + wsize = 0 + tx_is_segwit = there_is_one_segwit_input(ins) + if tx_is_segwit: + # flag and marker bytes + nwsize += 2 + + for i in ins: + if i not in inmults: + raise NotImplementedError( + "Script type not supported for transaction size " + "estimation: {}".format(i)) + inmult = inmults[i] + nwsize += inmult["nw"] + wsize += inmult["w"] + for o in outs: + if o not in outmults: + raise NotImplementedError( + "Script type not supported for transaction size " + "estimation: {}".format(o)) + nwsize += outmults[o] + + if not tx_is_segwit: + return nwsize + return (wsize, nwsize) def pubkey_to_p2pkh_script(pub, require_compressed=False): """ diff --git a/jmclient/jmclient/taker_utils.py b/jmclient/jmclient/taker_utils.py index a89fc60..97e4547 100644 --- a/jmclient/jmclient/taker_utils.py +++ b/jmclient/jmclient/taker_utils.py @@ -10,9 +10,9 @@ from .schedule import human_readable_schedule_entry, tweak_tumble_schedule,\ schedule_to_text from .wallet import BaseWallet, estimate_tx_fee, compute_tx_locktime, \ FidelityBondMixin -from jmbitcoin import make_shuffled_tx, amount_to_str, mk_burn_script,\ +from jmbitcoin import make_shuffled_tx, amount_to_str, \ PartiallySignedTransaction, CMutableTxOut,\ - human_readable_transaction, Hash160 + human_readable_transaction from jmbase.support import EXIT_SUCCESS log = get_log() @@ -21,6 +21,15 @@ Utility functions for tumbler-style takers; Currently re-used by CLI script tumbler.py and joinmarket-qt """ +def get_utxo_scripts(wallet: BaseWallet, utxos): + # given a Joinmarket wallet and a set of utxos + # as passed from `get_utxos_by_mixdepth` at one mixdepth, + # return the list of script types for each utxo + script_types = [] + for k, v in utxos.items(): + script_types.append(wallet.get_outtype(v["address"])) + return script_types + def direct_send(wallet_service, amount, mixdepth, destination, answeryes=False, accept_callback=None, info_callback=None, error_callback=None, return_transaction=False, with_final_psbt=False, @@ -97,38 +106,27 @@ def direct_send(wallet_service, amount, mixdepth, destination, answeryes=False, "There are no available utxos in mixdepth: " + str(mixdepth) + ", quitting.") return total_inputs_val = sum([va['value'] for u, va in utxos.items()]) - - if is_burn_destination(destination): - if len(utxos) > 1: - log.error("Only one input allowed when burning coins, to keep " - + "the tx small. Tip: use the coin control feature to freeze utxos") - return - address_type = FidelityBondMixin.BIP32_BURN_ID - index = wallet_service.wallet.get_next_unused_index(mixdepth, address_type) - path = wallet_service.wallet.get_path(mixdepth, address_type, index) - privkey, engine = wallet_service.wallet._get_key_from_path(path) - pubkey = engine.privkey_to_pubkey(privkey) - pubkeyhash = Hash160(pubkey) - - #size of burn output is slightly different from regular outputs - burn_script = mk_burn_script(pubkeyhash) - fee_est = estimate_tx_fee(len(utxos), 0, txtype=txtype, extra_bytes=len(burn_script)/2) - - outs = [{"script": burn_script, "value": total_inputs_val - fee_est}] - destination = "BURNER OUTPUT embedding pubkey at " \ - + wallet_service.wallet.get_path_repr(path) \ - + "\n\nWARNING: This transaction if broadcasted will PERMANENTLY DESTROY your bitcoins\n" - else: - #regular sweep (non-burn) - fee_est = estimate_tx_fee(len(utxos), 1, txtype=txtype, outtype=outtype) - outs = [{"address": destination, "value": total_inputs_val - fee_est}] + script_types = get_utxo_scripts(wallet_service.wallet, utxos) + fee_est = estimate_tx_fee(len(utxos), 1, txtype=script_types, outtype=outtype) + outs = [{"address": destination, "value": total_inputs_val - fee_est}] else: - #not doing a sweep; we will have change - #8 inputs to be conservative - initial_fee_est = estimate_tx_fee(8,2, txtype=txtype, outtype=outtype) - utxos = wallet_service.select_utxos(mixdepth, amount + initial_fee_est) + change_type = wallet_service.get_txtype() + if custom_change_addr: + change_type = wallet_service.get_outtype(custom_change_addr) + if change_type is None: + # we don't recognize this type; best we can do is revert to default, + # even though it may be inaccurate: + change_type = wallet_service.get_txtype() + outtypes = [change_type, outtype] + # not doing a sweep; we will have change. + # 8 inputs to be conservative; note we cannot account for the possibility + # of non-standard input types at this point. + initial_fee_est = estimate_tx_fee(8,2, txtype=txtype, outtype=outtypes) + utxos = wallet_service.select_utxos(mixdepth, amount + initial_fee_est, + includeaddr=True) + script_types = get_utxo_scripts(wallet_service.wallet, utxos) if len(utxos) < 8: - fee_est = estimate_tx_fee(len(utxos), 2, txtype=txtype, outtype=outtype) + fee_est = estimate_tx_fee(len(utxos), 2, txtype=script_types, outtype=outtypes) else: fee_est = initial_fee_est total_inputs_val = sum([va['value'] for u, va in utxos.items()]) diff --git a/jmclient/jmclient/wallet.py b/jmclient/jmclient/wallet.py index 2f373fb..237f5f7 100644 --- a/jmclient/jmclient/wallet.py +++ b/jmclient/jmclient/wallet.py @@ -57,6 +57,21 @@ def estimate_tx_fee(ins, outs, txtype='p2pkh', outtype=None, extra_bytes=0): '''Returns an estimate of the number of satoshis required for a transaction with the given number of inputs and outputs, based on information from the blockchain interface. + + Arguments: + ins: int, number of inputs + outs: int, number of outputs + txtype: either a single string, or a list of strings + outtype: either None or a list of strings + extra_bytes: an int + These arguments are intended to allow a kind of 'default', where + all the inputs and outputs match a predefined type (that of the wallet), + but also allow customization for heterogeneous input and output types. + For supported input and output types, see the keys of the dicts + `inmults` and `outmults` in jmbitcoin.secp256k1_transaction.estimate_tx_size`. + + Returns: + a single integer number of satoshis as estimate. ''' if jm_single().bc_interface is None: raise RuntimeError("Cannot estimate transaction fee " + @@ -73,18 +88,33 @@ def estimate_tx_fee(ins, outs, txtype='p2pkh', outtype=None, extra_bytes=0): btc.fee_per_kb_to_str(fee_per_kb) + " greater than absurd value " + btc.fee_per_kb_to_str(absurd_fee) + ", quitting.") - if txtype in ['p2pkh', 'p2shMofN']: - tx_estimated_bytes = btc.estimate_tx_size(ins, outs, txtype, outtype) + extra_bytes + + # See docstring for explanation: + if isinstance(txtype, str): + ins = [txtype]* ins + else: + assert isinstance(txtype, list) + ins = txtype + if outtype is None: + outs = [txtype] * outs + elif isinstance(outtype, str): + outs = [outtype] * outs + else: + assert isinstance(outtype, list) + outs = outtype + + # Note: the calls to `estimate_tx_size` in this code + # block can raise `NotImplementedError` if any of the + # strings in (ins, outs) are not known script types. + if not btc.there_is_one_segwit_input(ins): + tx_estimated_bytes = btc.estimate_tx_size(ins, outs) + extra_bytes return int((tx_estimated_bytes * fee_per_kb)/Decimal(1000.0)) - elif txtype in ['p2wpkh', 'p2sh-p2wpkh']: + else: witness_estimate, non_witness_estimate = btc.estimate_tx_size( - ins, outs, txtype, outtype) + ins, outs) non_witness_estimate += extra_bytes return int(int(( non_witness_estimate + 0.25*witness_estimate)*fee_per_kb)/Decimal(1000.0)) - else: - raise NotImplementedError("Txtype: " + txtype + " not implemented.") - def compute_tx_locktime(): # set locktime for best anonset (Core, Electrum) diff --git a/jmclient/test/test_taker.py b/jmclient/test/test_taker.py index 47db342..5b96b33 100644 --- a/jmclient/test/test_taker.py +++ b/jmclient/test/test_taker.py @@ -513,15 +513,15 @@ def test_custom_change(setup_taker): for out in taker.latest_tx.vout: # input utxo is 200M; amount is 20M; as per logs: # totalin=200000000 - # my_txfee=13050 + # my_txfee=13680 <- this estimate ignores address type # makers_txfee=3000 - # cjfee_total=12000 => changevalue=179974950 + # cjfee_total=12000 => changevalue=179974320 # note that there is a small variation in the size of # the transaction (a few bytes) for the different scriptPubKey - # type, but this is currently ignored by the Taker, who makes - # fee estimate purely based on the number of ins and outs; - # this will never be too far off anyway. - if out.scriptPubKey == script and out.nValue == 179974950: + # type, but this is currently ignored in coinjoins by the + # Taker (not true for direct send operations), hence we get + # the same value for each different output type. + if out.scriptPubKey == script and out.nValue == 179974320: # must be only one assert not custom_change_found custom_change_found = True