arg_needle_lib

class arg_needle_lib.ARG

Bases: pybind11_object

add_sample(self: arg_needle_lib_pybind.ARG, sample_name: str = '') → int

check_basic(self: arg_needle_lib_pybind.ARG, stringent: bool = True) → None: Basic checks after initializing an ARG

check_children(self: arg_needle_lib_pybind.ARG, stringent: bool = True) → None: Checks after populating children

check_roots(self: arg_needle_lib_pybind.ARG) → None: Checks after populating roots

get_id_of_closest_site(self: arg_needle_lib_pybind.ARG, position: float) → int

get_idx_of_first_mutation_left_of(self: arg_needle_lib_pybind.ARG, physical_pos: float, include_equal: bool = False, warn_out_of_range: bool = True) → int

get_idx_of_first_mutation_right_of(self: arg_needle_lib_pybind.ARG, physical_pos: float, include_equal: bool = False, warn_out_of_range: bool = True) → int

get_idx_of_mutation_closest_to(self: arg_needle_lib_pybind.ARG, physical_pos: float) → int

get_mutation_sites(self: arg_needle_lib_pybind.ARG) → Dict[float, arg_needle_lib_pybind.Site]

get_site(self: arg_needle_lib_pybind.ARG, site_id: int) → float

get_sites(self: arg_needle_lib_pybind.ARG) → List[float]

is_leaf(self: arg_needle_lib_pybind.ARG, node_id: int) → bool

lowest_mutated_edge(self: arg_needle_lib_pybind.ARG, arg0: int, arg1: float) → arg_needle_lib_pybind.ARGEdge

lowest_mutated_edge_by_site(self: arg_needle_lib_pybind.ARG, arg0: int, arg1: int) → arg_needle_lib_pybind.ARGEdge

mrca(self: arg_needle_lib_pybind.ARG, ID1: int, ID2: int, position: float) → arg_needle_lib_pybind.ARGNode

mutations(self: arg_needle_lib_pybind.ARG) → List[arg_needle_lib_pybind.Mutation]: Return list of all Mutation objects

node(self: arg_needle_lib_pybind.ARG, ID: int) → arg_needle_lib_pybind.ARGNode: Get node at ID

node_ids(self: arg_needle_lib_pybind.ARG) → List[int]: Return list of all node IDs

num_edges(self: arg_needle_lib_pybind.ARG) → int

num_mutations(self: arg_needle_lib_pybind.ARG) → int

num_nodes(self: arg_needle_lib_pybind.ARG) → int

num_samples(self: arg_needle_lib_pybind.ARG) → int: Return number of samples

num_sites(self: arg_needle_lib_pybind.ARG) → int

populate_children_and_roots(self: arg_needle_lib_pybind.ARG) → None

populate_mutations_on_edges(self: arg_needle_lib_pybind.ARG) → None

root_at(self: arg_needle_lib_pybind.ARG, position: float) → arg_needle_lib_pybind.Root

root_starts(self: arg_needle_lib_pybind.ARG) → List[float]

set_chromosome(self: arg_needle_lib_pybind.ARG, chromosome: int) → None

set_offset(self: arg_needle_lib_pybind.ARG, offset: int) → None

set_sites(self: arg_needle_lib_pybind.ARG, positions: List[float]) → None

thread_sample(self: arg_needle_lib_pybind.ARG, section_starts: List[float], sample_ids: List[int], heights: List[float]) → None

property chromosome

property end

property leaf_ids

property offset

property reserved_samples

property sample_names

property start

property threaded_samples

class arg_needle_lib.ARGEdge

Bases: pybind11_object

mutations(self: arg_needle_lib_pybind.ARGEdge) → List[Mutation]: Return list of all mutation objects

property child

property end

property parent

property start

class arg_needle_lib.ARGNode

Bases: pybind11_object

add_parent(self: arg_needle_lib_pybind.ARGNode, start: float, end: float, parent: arg_needle_lib_pybind.ARGNode) → ARGEdge

parent_edge_at(self: arg_needle_lib_pybind.ARGNode, position: float) → ARGEdge

parent_edges(self: arg_needle_lib_pybind.ARGNode) → List[ARGEdge]: Returns a sorted list of parent edges

parent_starts(self: arg_needle_lib_pybind.ARGNode) → List[float]: Returns a sorted list of parent starts

remove_parent(self: arg_needle_lib_pybind.ARGNode, start: float) → None

update_parent_end(self: arg_needle_lib_pybind.ARGNode, start: float, end_new: float) → None

update_parent_start(self: arg_needle_lib_pybind.ARGNode, start_old: float, start_new: float) → None

property ID

property end

property height

property start

class arg_needle_lib.DescendantList

Bases: pybind11_object

static print_threshold() → None: Print threshold

static set_threshold(threshold: int) → None: Set threshold

class arg_needle_lib.Mutation

Bases: pybind11_object

property edge

property height

property position

property site_id

class arg_needle_lib.Root

Bases: pybind11_object

property end

property node

property start

class arg_needle_lib.Site

Bases: pybind11_object

get_mutations(self: arg_needle_lib_pybind.Site) → List[arg_needle_lib_pybind.Mutation]

get_position(self: arg_needle_lib_pybind.Site) → float

arg_needle_lib.arg_to_newick(arg: arg_needle_lib_pybind.ARG, verbose: bool = False) → str: Return a Newick representation of an ARG, verbose includes branch lengths

arg_needle_lib.arg_to_tskit(arg, batch_size=None, mutations=True, sample_permutation=None)

Convert arg_needle_lib ARG object to tskit.TreeSequence using tables

For this to work properly, the node IDs in the ARG object must all be consecutive.

Parameters:

arg – arg_needle_lib.ARG object to convert
batch_size (int) – Maximum allowed size for edge list information mainly used to save memory (default is no batching)
mutations (bool) – If True, converts mutations as well (default=True)
sample_permutation – If not None, then should be a permutation of values from 0 to num_samples - 1, either as a numpy array or Python list. Sample i in the resulting tskit.TreeSequence will correspond to sample sample_permutation[i] in the arg_needle_lib.ARG.

Returns:

Converted tskit.TreeSequence object.

Raises:

ValueError – Error in ARG format, batch_size parameter, or sample_permutation.

arg_needle_lib.association_diploid_all(arg: arg_needle_lib_pybind.ARG, phenotype: List[float], use_sample: List[bool], file_root: str, chromosome: int = 1, snp_prefix: str = '', min_maf: float = -1, max_maf: float = -1, write_bitset_threshold: float = -1, calibration_factor: float = 1, concise_pvalue: bool = True, max_only: bool = False, careful: bool = False) → float

ARG diploid association testing all clades.

Writes summary statistics to [file_root].tab.gz and returns the max chi2. Additionally, can write significant variants in [file_root].haps.gz if write_bitset_threshold is passed.

Parameters:

arg – arg_needle_lib.ARG object with 2*n leaves
phenotype – a length n array of phenotypes in the same order as the ARG samples. Missing values are specified using the use_sample array and can be filled with any value in the phenotype.
use_sample – a length n array of booleans, False means the phenotype is missing
file_root – results are written to [file_root].tab.gz and [file_root].haps.gz
chromosome – used for writing summary statistics
snp_prefix – prefix used for writing summary statistics
min_maf – minimum MAF to test (default=-1 which means no minimum)
max_maf – maximum MAF to test (default=-1 which means no maximum)
write_bitset_threshold – variants with p-value less than the threshold are written to [file_root].haps.gz (default=-1, no writing)
calibration_factor – used in ARG-MLMA workflows, can be set to 1 to instead run linear regression (default=1)
concise_pvalue – return only two significant digits (default=True)
max_only – if True, does not write to file and only returns the max chi2 (default=False)
careful – if True, computes a residual sum of squares from the inferred beta as part of the standard error. If False, uses the norm of the phenotype as an approximation, like BOLT. The careful version will give slightly more significant p-values, especially for significant variants. Defaults to False.

Returns:

The maximum chi2 across the tests.

arg_needle_lib.association_diploid_mutation(arg: arg_needle_lib_pybind.ARG, phenotype: List[float], use_sample: List[bool], file_root: str, mus: List[float], random_seed: int = 0, chromosome: int = 1, snp_prefix: str = '', min_maf: float = -1, max_maf: float = -1, write_bitset_threshold: float = -1, calibration_factor: float = 1, concise_pvalue: bool = True, max_only: bool = False, careful: bool = False) → List[float]

ARG diploid association testing using mutation-based sampling.

Writes summary statistics to [file_root].tab.gz and returns the max chi2 for each mutation rate passed. Additionally, can write significant variants in [file_root].haps.gz if write_bitset_threshold is passed.

One of the columns written with this option is MU_STAR. For each mutation tested, the MU_STAR value says “if you instead performed testing with a smaller mutation rate, you would sample this mutation if mu > MU_STAR”. This allows one to filter the tab.gz output for more stringent mutation rates.

Parameters:

arg – arg_needle_lib.ARG object with 2*n leaves
phenotype – a length n array of phenotypes in the same order as the ARG samples. Missing values are specified using the use_sample array and can be filled with any value in the phenotype.
use_sample – a length n array of booleans, False means the phenotype is missing
file_root – results are written to [file_root].tab.gz and [file_root].haps.gz
mus – an array of mutation rates to use for testing. In practice, for the [file_root].tab.gz and [file_root].haps.gz output, only the largest mutation rate of the mus array is used. However, for returning the maximum chi2 over tests, a maximum chi2 is returned for each of the mutation rates in order.
random_seed – seed for mutation-based sampling. When set to 0, uses the time to seed (default=0).
chromosome – used for writing summary statistics
snp_prefix – prefix used for writing summary statistics
min_maf – minimum MAF to test (default=-1 which means no minimum)
max_maf – maximum MAF to test (default=-1 which means no maximum)
write_bitset_threshold – variants with p-value less than the threshold are written to [file_root].haps.gz (default=-1, no writing)
calibration_factor – used in ARG-MLMA workflows, can be set to 1 to instead run linear regression (default=1)
concise_pvalue – return only two significant digits (default=True)
max_only – if True, does not write to file and only returns the max chi2 (default=False)
careful – if True, computes a residual sum of squares from the inferred beta as part of the standard error. If False, uses the norm of the phenotype as an approximation, like BOLT. The careful version will give slightly more significant p-values, especially for significant variants. Defaults to False.

Returns:

An array of maximum chi2 across the tests, one for each mutation rate in mus.

arg_needle_lib.bitset_volume_map(arg: arg_needle_lib_pybind.ARG, verbose: bool = False) → Dict[str, float]: Traverse the ARG and return a map of bitset string to volume

arg_needle_lib.compute_grm(arg: arg_needle_lib_pybind.ARG, alpha: float = 0, batch_size: int = 256, diploid: bool = True, min_maf: float = 0.0, max_maf: float = 0.5) → numpy.ndarray[numpy.float64[m, n]]

Compute GRM from existing mutations.

Parameters:

arg – arg_needle_lib.ARG object
alpha – if -1, the variants of branches are standardized before computing the GRM. If 0, the variants of branches are treated as-is and not standardized. Values in between interpolate between these behaviors (default=-1).
batch_size – batch size for GRM computations (default=True)
diploid – if True, leaves 2*i and 2*i+1 are treated as haploid pairs for sample i when computing the GRM (default=True)
min_maf – minimum MAF for mutations to include in the GRM (default=0)
max_maf – maximum MAF for mutations to include in the GRM (default=0.5)

Returns:

A numpy matrix with the GRM (no centering operations are performed).

arg_needle_lib.deserialize_arg(file_name: str, chunk_size: int = 1000, reserved_samples: int = -1)

Deserialize ARG from disk and create an arg_needle_lib.ARG object to return

Parameters:

file_name – the file name of the serialized ARG file
chunk_size – the chunk size for reading; small numbers will dramatically increase deserialization time
reserved_samples – parameter for ARG constructor allowing us to reserve additional samples for threading (default is to not reserve additional samples)

Returns:

an arg_needle_lib.ARG object

arg_needle_lib.exact_arg_grm(arg, alpha=-1, from_pos=-1, to_pos=-1, diploid=True, centering=True)

Computes the exact ARG-GRM up to Gower centering and row-column centering.

Parameters:

arg – arg_needle_lib.ARG object
alpha – if -1, the variants of branches are standardized before computing the GRM. If 0, the variants of branches are treated as-is and not standardized. Values in between interpolate between these behaviors (default=-1).
from_pos – start physical position for ARG-GRM computation (default=-1, use beginning of ARG)
to_pos – end physical position for ARG-GRM computation (default=-1, use end of ARG)
diploid – if True, leaves 2*i and 2*i+1 are treated as haploid pairs for sample i when computing the GRM (default=True)
centering – if True, applies row-column centering and Gower centering to the GRM before returning (default=True)

Returns:

A numpy matrix with the exact ARG-GRM up to Gower centering and row-column centering.

arg_needle_lib.generate_m_mutations(arg: arg_needle_lib_pybind.ARG, M: int, random_seed: int = 0) → None: Generate exactly M mutations and keep on the ARG

arg_needle_lib.generate_mutations(arg: arg_needle_lib_pybind.ARG, mu: float, random_seed: int = 0, num_mutations_hint: int = 0) → None: Generate mutations with a given rate and keep on the ARG

arg_needle_lib.get_genotype(arg: arg_needle_lib_pybind.ARG, mutation: arg_needle_lib_pybind.Mutation, diploid: bool = False) → List[int]: Get a vector representing the genotype of the mutation

arg_needle_lib.get_mutations_matrix(arg: arg_needle_lib_pybind.ARG, from_pos: float = -inf, to_pos: float = inf, include_left: bool = True, include_right: bool = False) → numpy.ndarray[numpy.int8[m, n]]: Return a mutation matrix corresponding to existing mutations on the ARG

arg_needle_lib.gower_center(mat)

Apply row-column centering to a numpy square matrix in-place.

Also returns the matrix.

Note: row column centering and Gower centering commute, i.e., they can be applied in either order.

arg_needle_lib.haploid_grm_to_diploid(hap_mat): Turns a haploid GRM into a diploid GRM by treating neighboring IDs as haploid pairs.

arg_needle_lib.impute(arg: arg_needle_lib_pybind.ARG, position: float, genotypes: List[int], old: bool = False) → List[float]: ARG-based imputation described in the ARG-Needle paper

arg_needle_lib.kc2_length_stab(arg1, arg2, num_stabs, lambdas=[1])

Compares two ARGs using stabbing queries, returning length-aware KC squared.

Parameters:

arg1 – the first ARG
arg2 – the second ARG
num_stabs – how many stabbing queries to perform. For i from 0 to num_stabs - 1 inclusive, the proportional positions along the genome are then sampled as i*phi (mod 1), where phi is the golden ratio. See also Supplementary Note 2 in https://doi.org/10.1038/s41588-023-01379-x
lambdas – array-like containing values of lambdas

Returns:

List of values containing length-aware KC squared for each lambda.

arg_needle_lib.kc2_special_stab(arg, arg_true, num_stabs, special_behavior=None, merge_fraction=0, random_kc_seed=0)

Compares two ARGs using stabbing queries, returning topology KC squared under special behavior.

Parameters:

arg1 – the first ARG, which can be perturbed by either merging nodes or randomly resolving polytomies.
arg2 – the second ARG, which can be perturbed by either merging nodes or randomly resolving polytomies.
num_stabs – how many stabbing queries to perform. For i from 0 to num_stabs - 1 inclusive, the proportional positions along the genome are then sampled as i*phi (mod 1), where phi is the golden ratio. See also Supplementary Note 2 in https://doi.org/10.1038/s41588-023-01379-x.
special_behavior – can be “random_resolve”, “heuristic_merge”, or “random_merge”. Each of these is described in Supplementary Note 2 of https://doi.org/10.1038/s41588-023-01379-x
merge_fraction – used by heuristic_merge and random_merge to figure out how many nodes to merge in each stabbing query tree. Default is 0.
random_kc_seed – used by random_resolve and random_merge to perform random behavior. The C++ RNG is seeded with this seed, and then iterates through the different stabbing query positions, consuming randomness from the RNG as necessary. Therefore different stabbing query trees will have different random behavior, because the RNG is only seeded once. Default is 0, but for random_resolve and random_merge, we check that the seed is nonzero before continuing so that the user is aware of the importance of seeding.

Returns:

The topology KC squared value, under the appropriate special behavior.

arg_needle_lib.kc2_tmrca_sv_stab(arg1, arg2, num_stabs)

Compares two ARGs using stabbing queries, returning topology KC squared, TMRCA MSE, and the split-vector (SV) metric squared and N-normalised.

The SV metric is computed as the L2 distance between the split-size vectors of arg1 and arg2. The split-size vector enumerates num_leaves(mrca(i, j)) for all leaf pairs {i, j}. This metric is due to Smith, https://doi.org/10.1093/sysbio/syab100.

Parameters:

arg1 – the first ARG
arg2 – the second ARG
num_stabs – how many stabbing queries to perform. For i from 0 to num_stabs - 1 inclusive, the proportional positions along the genome are then sampled as i*phi (mod 1), where phi is the golden ratio. See also Supplementary Note 2 in https://doi.org/10.1038/s41588-023-01379-x

Returns:

Dictionary with keys “kc2”, “tmrca_mse”, “sv2” and float values.

arg_needle_lib.local_volume(arg: arg_needle_lib_pybind.ARG, min_position: float, max_position: float) → float: Get the local arg volume

arg_needle_lib.monte_carlo_arg_grm(arg, monte_carlo_mu, seed=1234, alpha=-1, diploid=True, centering=True, min_maf=0.0, max_maf=0.5, batch_size=256)

Computes the Monte Carlo ARG-GRM.

This version wraps the C++ Eigen implementation and is more memory-efficient than the numpy version monte_carlo_arg_grm_numpy.

Parameters:

arg – arg_needle_lib.ARG object
monte_carlo_mu – mutation rate used for sampling mutations to compute the ARG-GRM
seed – random seed used for sampling (default=1234)
alpha – if -1, all variants are standardized before computing the GRM. If 0, variants are treated as-is and not standardized. Values in between interpolate between these behaviors (default=-1).
diploid – if True, leaves 2*i and 2*i+1 are treated as haploid pairs for sample i when computing the GRM (default=True)
centering – if True, applies row-column centering and Gower centering to the GRM before returning (default=True)
min_maf – exclusive minimum MAF (default=0.)
max_maf – inclusive maximum MAF (default=0.5)
batch_size – batch size when computing the ARG-GRM (default=256)

Returns:

A numpy matrix with the estimated Monte Carlo ARG-GRM.

arg_needle_lib.mutation_best(arg: arg_needle_lib_pybind.ARG, position: float, genotypes: List[int], random_seed: int = 0) → int: Hamming distance for best mutation placement on ARG at a position

arg_needle_lib.mutation_match(arg: arg_needle_lib_pybind.ARG, position: float, genotypes: List[int]) → bool: Boolean for whether genotypes can arise from a single mutation at position

arg_needle_lib.mutation_match_all(arg, ts_with_mutations, resolve_reps=0)

Tries to best match a given set of mutations on to an ARG, returning results.

Parameters:

arg – arg_needle_lib.ARG object (usually one that has been inferred)
ts_with_mutations – tskit.TreeSequence object containing mutations, that we want to check for on arg
resolve_reps (int) – how many times to randomly resolve polytomies in the arg object when matching. This is unnecessary if arg contains no polytomies, but otherwise allows for matching against resolved binary trees, averaging over several random resolutions. Default 0, which corresponds to no resolving of polytomies.

Returns:

Two lists (errors, diffs), each of length equal to the number of mutations in: ts_with_mutations. For each mutation, errors gives 0 if a mutation can be correctly placed on the ARG to give that genotype at that position, 1 if not, and a value in between if random resolution of polytomies sometimes gives a match and sometimes does not. diffs gives the Hamming distance between the best possible placed mutation and the genotype from ts_with_mutations, with the mean Hamming distance in the case of random resolution of polytomies.

arg_needle_lib.num_lineages(arg: arg_needle_lib_pybind.ARG, position: float, height: float) → int: Count the number of lineages at a given position and height

arg_needle_lib.rf_resolve_stab(arg1, arg2, num_stabs, resolve_seeds, min_mac=0, max_mac=0)

Computes scaled Robinson-Foulds using stabbing queries, with polytomies randomly resolved.

Parameters:

arg1 – the first ARG
arg2 – the second ARG
num_stabs – how many stabbing queries to perform. For i from 0 to num_stabs - 1 inclusive, the proportional positions along the genome are then sampled as i*phi (mod 1), where phi is the golden ratio. See also Supplementary Note 2 in https://doi.org/10.1038/s41588-023-01379-x.
resolve_seeds – a list of seeds to use for randomly resolving polytomies.
min_mac – min MAC (inclusive)
max_mac – max MAC (exclusive, value of 0 means set to maximum)

Returns:

Dictionary with key “scaled_robinson_foulds” and a float value, as well as the less: used keys “branch_recall” and “branch_precision”.

arg_needle_lib.rf_total_variation_stab(arg1, arg2, num_stabs, min_mac=0, max_mac=0, may_contain_polytomies=False)

Computes scaled Robinson-Foulds and total variation using stabbing queries.

In this version, polytomies are not resolved, which may bias the metric. See also rf_resolve_stab.

Parameters:

arg1 – the first ARG
arg2 – the second ARG
num_stabs – how many stabbing queries to perform. For i from 0 to num_stabs - 1 inclusive, the proportional positions along the genome are then sampled as i*phi (mod 1), where phi is the golden ratio. See also Supplementary Note 2 in https://doi.org/10.1038/s41588-023-01379-x.
min_mac – min MAC (inclusive)
max_mac – max MAC (exclusive, value of 0 means set to maximum)
may_contain_polytomies – if False, and the two ARGs do not have polytomies and there are no MAC filters, then a simple check is done verifying that calculations are done correctly. If there may be polytomies, then set to True. See also rf_resolve_stab which will randomly resolve the polytomies. Default = False.

Returns:

Dictionary with keys “scaled_robinson_foulds” and “total_variation” and float values, as: well as the less used keys “branch_recall”, “branch_precision”, “mutation_recall”, and “mutation_precision”.

arg_needle_lib.row_column_center(mat)

Apply row-column centering to a numpy square matrix in-place.

Also returns the matrix.

Note: row column centering and Gower centering commute, i.e., they can be applied in either order.

arg_needle_lib.serialize_arg(arg_to_serialize: ARG, file_name: str = 'arg.arg', chunk_size: int = 1000, compression='gzip', compression_opts=None, node_bounds=True, mutations=None, sites=None)

Serialize ARG to disk using HDF5.

By default, gzip compression with level 9 will be used, but available options are listed below. Compression can be turned off by passing compression=None.

Parameters:

arg_to_serialize – arg_needle_lib.ARG object to serialize
file_name – the file name to serialize the ARG to
chunk_size – the chunk size for writing; small numbers will dramatically increase serialization time
compression – whether to use compression; can save up to ~50% on disk at a runtime cost Refer to h5py documentation for details gzip provides good compression with compression_opts=9, with ~+50% runtime for serialization lzf provides poor compression, with little runtime overhead szip provides good compression, with little runtime overhead, but is not available on all systems
compression_opts – the level of compression requested for gzip: 9 for highest compression. Should be None if compression != ‘gzip’
node_bounds – whether to serialize node start/end positions. Default: True
mutations – whether to serialize mutations. Default: True if num_mutations > 0, otherwise False
sites – whether to serialize sites. Default: True if sites present, otherwise False

arg_needle_lib.stab_return_all_bitsets(arg: arg_needle_lib_pybind.ARG, position: float) → List[Tuple[int, float, List[bool]]]: All bitsets at a position of the ARG, each tuple with values allele count / length / bitset

arg_needle_lib.total_volume(arg: arg_needle_lib_pybind.ARG) → float: Get ARG volume

arg_needle_lib.trim_arg(arg: arg_needle_lib_pybind.ARG, trim_start: float, trim_end: float) → arg_needle_lib_pybind.ARG: Trim ARG down to the specified interval. Start and end positions do not include the offset.

arg_needle_lib.tskit_to_arg(ts, reserved_samples=-1)

Convert tskit.TreeSequence to arg_needle_lib.ARG using a custom constructor

Parameters:

ts – tskit.TreeSequence object
reserved_samples (int) – Parameter from ARG constructor allowing us to reserve additional samples for threading (default is to not reserve additional samples).

Returns:

An arg_needle_lib.ARG object.

arg_needle_lib.validate_serialized_arg(file_name: str)

Validate a serialized ARG on disk. This method returns True if the file is valid, and False otherwise. A message indicating the fault will be printed if the serialized ARG file is invalid.

Parameters:: file_name – the file name of the serialized ARG file
Returns:: whether the serialized ARG file is valid

arg_needle_lib.write_bitsets(arg: arg_needle_lib_pybind.ARG, file_root: str = '', diploid: bool = False, chromosome: int = 1, snp_prefix: str = '', min_mac: int = 0, max_mac: int = 0, write_dosage: bool = False, use_gz: bool = False, count_only: bool = False) → int: Write out ARG branches as bitsets

arg_needle_lib.write_grm(grm, num_sites, path_prefix, write_binary=False)

Writes a GRM to file in a format that can be used by GCTA.

Parameters:

grm – numpy matrix containing the GRM
num_sites – number of sites used to compute the GRM, expected but typically unused by downstream applications
path_prefix – string, results will be written to path_prefix.grm.*
write_binary – if True, write a binary file format that is observed to yield faster write times and can be read by GCTA. If False, write using gzip (default=False).

arg_needle_lib.write_mutations_to_haps(arg: arg_needle_lib_pybind.ARG, file_root: str, min_maf: float = 0, max_maf: float = 1, min_time: float = 0, max_time: float = inf) → None: Write mutations to disk in haps/samples format