Skip to content
Snippets Groups Projects
Commit 98a05c21 authored by Totaro Massimo G's avatar Totaro Massimo G
Browse files

fix: output file names

parent 4b4161b5
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# SSN analysis # SSN analysis
This is an automated script to generate a Sequence Similarity Network starting from a curated sequence alignment. This is an automated script to generate a Sequence Similarity Network starting from a curated sequence alignment.
To run the analysis, click on the button in the cell below. To run the analysis, click on the button in the cell below.
You will be prompted to upload a FASTA file containing the curated sequence alignment. You will be prompted to upload a FASTA file containing the curated sequence alignment.
Alternatively, a BLAST-preanalysed database can be provided. Alternatively, a BLAST-preanalysed database can be provided.
Just hit cancel on the first FASTA upload prompt and then provide the TSV file. Just hit cancel on the first FASTA upload prompt and then provide the TSV file.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#@title Click below to run #@title Click below to run
import numpy import numpy
from google.colab import files from google.colab import files
from pandas import read_csv from pandas import read_csv
from zipfile import ZipFile from zipfile import ZipFile
from os.path import isfile from os.path import isfile
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
fileFasta = NamedTemporaryFile(suffix='.fasta').name fileFasta = NamedTemporaryFile(suffix='.fasta').name
fileTsv = NamedTemporaryFile(suffix='.tsv').name fileTsv = NamedTemporaryFile(suffix='.tsv').name
try: try:
print('Upload FASTA file to be analysed') print('Upload FASTA file to be analysed')
files.upload_file(fileFasta) files.upload_file(fileFasta)
print('Analysing FASTA file...') print('Analysing FASTA file...')
!apt-get install ncbi-blast+ > /dev/null && \ !apt-get install ncbi-blast+ > /dev/null && \
makeblastdb -in {fileFasta} -dbtype prot -parse_seqids -out 'DB' > /dev/null && \ makeblastdb -in {fileFasta} -dbtype prot -parse_seqids -out 'DB' > /dev/null && \
blastp -db 'DB' -query {fileFasta} -out {fileTsv} -outfmt "6 qseqid sseqid evalue bitscore" blastp -db 'DB' -query {fileFasta} -out {fileTsv} -outfmt "6 qseqid sseqid evalue bitscore"
if not isfile(fileTsv): if not isfile(fileTsv):
print('The provided FASTA file could not be processed') print('The provided FASTA file could not be processed')
raise ValueError raise ValueError
except ValueError: except ValueError:
try: try:
print('Upload TSV file to be analysed') print('Upload TSV file to be analysed')
files.upload_file(fileTsv) files.upload_file(fileTsv)
except ValueError: except ValueError:
print('No file provided') print('No file provided')
if isfile(fileTsv): if isfile(fileTsv):
fileBOut = 'evs.csv' fileBOut = 'bts.csv'
fileEOut = 'bts.csv' fileEOut = 'ets.csv'
fileZip = NamedTemporaryFile(suffix='.zip').name fileZip = NamedTemporaryFile(suffix='.zip').name
try: try:
df = read_csv(fileTsv, df = read_csv(fileTsv,
sep='\t', sep='\t',
header=None, header=None,
index_col=[0, 1], index_col=[0, 1],
names=['T', 'S', 'e', 'b'], names=['T', 'S', 'e', 'b'],
dtype={'e':numpy.float64, dtype={'e':numpy.float64,
'b':numpy.float64}) 'b':numpy.float64})
e = df.pivot_table(index='T', e = df.pivot_table(index='T',
columns='S', columns='S',
values='e', values='e',
aggfunc='min') aggfunc='min')
((numpy.minimum(e, e.T) + numpy.tril(numpy.full(e.shape, numpy.nan))) ((numpy.minimum(e, e.T) + numpy.tril(numpy.full(e.shape, numpy.nan)))
.unstack() .unstack()
.to_frame() .to_frame()
.sort_index(level=1) .sort_index(level=1)
.dropna() .dropna()
.reset_index() .reset_index()
.to_csv(fileEOut, .to_csv(fileEOut,
sep=';', sep=';',
index=False, index=False,
header=['Source','Target','evalue'])) header=['Source','Target','evalue']))
b = df.pivot_table(index='T', b = df.pivot_table(index='T',
columns='S', columns='S',
values='b', values='b',
aggfunc='max') aggfunc='max')
((numpy.maximum(b, b.T) + numpy.tril(numpy.full(b.shape, numpy.nan))) ((numpy.maximum(b, b.T) + numpy.tril(numpy.full(b.shape, numpy.nan)))
.unstack() .unstack()
.to_frame() .to_frame()
.sort_index(level=1) .sort_index(level=1)
.dropna() .dropna()
.reset_index() .reset_index()
.to_csv(fileBOut, .to_csv(fileBOut,
sep=';', sep=';',
index=False, index=False,
header=['Source','Target','bitscore'])) header=['Source','Target','bitscore']))
with ZipFile(fileZip, 'w') as f: with ZipFile(fileZip, 'w') as f:
f.write(fileBOut) f.write(fileBOut)
f.write(fileEOut) f.write(fileEOut)
files.download(fileZip) files.download(fileZip)
except: except:
print('Invalid file format') print('Invalid file format')
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment