Skip to content
Snippets Groups Projects
Commit 98a05c21 authored by Totaro Massimo G's avatar Totaro Massimo G
Browse files

fix: output file names

parent 4b4161b5
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# SSN analysis
This is an automated script to generate a Sequence Similarity Network starting from a curated sequence alignment.
To run the analysis, click on the button in the cell below.
You will be prompted to upload a FASTA file containing the curated sequence alignment.
Alternatively, a BLAST-preanalysed database can be provided.
Just hit cancel on the first FASTA upload prompt and then provide the TSV file.
%% Cell type:code id: tags:
```
#@title Click below to run
import numpy
from google.colab import files
from pandas import read_csv
from zipfile import ZipFile
from os.path import isfile
from tempfile import NamedTemporaryFile
fileFasta = NamedTemporaryFile(suffix='.fasta').name
fileTsv = NamedTemporaryFile(suffix='.tsv').name
try:
print('Upload FASTA file to be analysed')
files.upload_file(fileFasta)
print('Analysing FASTA file...')
!apt-get install ncbi-blast+ > /dev/null && \
makeblastdb -in {fileFasta} -dbtype prot -parse_seqids -out 'DB' > /dev/null && \
blastp -db 'DB' -query {fileFasta} -out {fileTsv} -outfmt "6 qseqid sseqid evalue bitscore"
if not isfile(fileTsv):
print('The provided FASTA file could not be processed')
raise ValueError
except ValueError:
try:
print('Upload TSV file to be analysed')
files.upload_file(fileTsv)
except ValueError:
print('No file provided')
if isfile(fileTsv):
fileBOut = 'evs.csv'
fileEOut = 'bts.csv'
fileBOut = 'bts.csv'
fileEOut = 'ets.csv'
fileZip = NamedTemporaryFile(suffix='.zip').name
try:
df = read_csv(fileTsv,
sep='\t',
header=None,
index_col=[0, 1],
names=['T', 'S', 'e', 'b'],
dtype={'e':numpy.float64,
'b':numpy.float64})
e = df.pivot_table(index='T',
columns='S',
values='e',
aggfunc='min')
((numpy.minimum(e, e.T) + numpy.tril(numpy.full(e.shape, numpy.nan)))
.unstack()
.to_frame()
.sort_index(level=1)
.dropna()
.reset_index()
.to_csv(fileEOut,
sep=';',
index=False,
header=['Source','Target','evalue']))
b = df.pivot_table(index='T',
columns='S',
values='b',
aggfunc='max')
((numpy.maximum(b, b.T) + numpy.tril(numpy.full(b.shape, numpy.nan)))
.unstack()
.to_frame()
.sort_index(level=1)
.dropna()
.reset_index()
.to_csv(fileBOut,
sep=';',
index=False,
header=['Source','Target','bitscore']))
with ZipFile(fileZip, 'w') as f:
f.write(fileBOut)
f.write(fileEOut)
files.download(fileZip)
except:
print('Invalid file format')
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment