Get underlying data to calculate Rule Set 3 target scores
design_df = pd.read_table('test_data/sgrna-designs.txt')
max_n_jobs = multiprocessing.cpu_count()
assert(post_transcript_sequence(["ENSG00000157764", "ENSG00000248378"],
seq_type='genomic')[0]['seq'][:4] == 'CTTC')
transcript_aa_seq_df = build_transcript_aa_seq_df(design_df, n_jobs=2)
transcript_aa_seq_df
brca1_overlap = get_translation_overlap('ENSP00000350283')
assert 'BRCT domain' in pd.DataFrame(brca1_overlap)['description'].to_list()
translation_overlap_df = build_translation_overlap_df(transcript_aa_seq_df['id'],
n_jobs=2)
translation_overlap_df
write_transcript_data(design_df, n_jobs=2)
assert os.path.isfile('./data/target_data/' + 'aa_seqs.pq')
exon_df, trans_sr, chr = get_transcript_info('ENST00000259457')
assert chr == '9'
assert trans_sr['length'] == 277 # number of aas
assert exon_df.shape[0] == 8 # eight exons
rps20_exon_conservation = get_conservation('8', 56074060, 56074159, 'hg38')
cd274_exon_conservation = get_conservation('9', 5457079, 5457420, 'hg38')
assert rps20_exon_conservation['conservation'].mean() > cd274_exon_conservation['conservation'].mean()
rps20_conservation_df = get_transcript_conservation('ENST00000009589', '-', 'hg38')
cd274_conservation_df = get_transcript_conservation('ENST00000381577', '+', 'hg38')
assert rps20_conservation_df['conservation'].mean() > cd274_conservation_df['conservation'].mean()
conservation_df = build_conservation_df(design_df, n_jobs=max_n_jobs)
assert (conservation_df.groupby('Target Transcript')
.apply(lambda df: stats.spearmanr(df['conservation'], df['ranked_conservation'])[0])
> 0.99).all()
assert (conservation_df.loc[(conservation_df['Transcript Base'] == 'ENST00000361337') &
(conservation_df['target position'].between(432*3, 663*3)),
'ranked_conservation'].mean() > # pfam
conservation_df.loc[(conservation_df['Transcript Base'] == 'ENST00000361337') &
(conservation_df['target position'].between(36*3, 199*3)), # mobidblit
'ranked_conservation'].mean())
write_conservation_data(design_df, n_jobs=max_n_jobs)
assert os.path.isfile('./data/target_data/' + 'conservation.pq')