Recently, I was working on a project dealing with a large number of coordinate files, which required me to convert each chain in some coordinate files with MMCIF/PDBXML/MMTF/PDB format into PDB format. Some of the chains in MMCIF files have long chain names, which makes the writepdb function down due to chain name too long.
How can I set a Specific chain name for a Structure?
I have found a function in biopython to solve this problem:
set_chain_info
( self , chain_id , chain_name , num_groups )
Set the chain information.
Parameters
- chain_id – the asym chain id from mmCIF
- chain_name – the auth chain id from mmCIF
- num_groups – the number of groups this chain has.
But I want to solve this problem in Julia, my script is as follows. How can I solve this problem?
using BioStructures #BioStructures Module to read and write pdb files.
"Module to split a Structure to many single chain pdb file."
function splitStructure(Structure,pdbid::AbstractString,library::AbstractString,singlechainDB::AbstractString)
for model in Structure
modelname = modelnumber(model)
for chain in model
chainname = chainid(chain)
try
writepdb("$singlechainDB/$pdbid-$modelname-$chainname.pdb",chain)
catch e0
writemmtf("$singlechainDB/$pdbid-$modelname-$chainname.cif",chain)
println("$pdbid-$modelname-$chainname has been saved as cif")
end
end
end
end
"Module to split a pdb file to many single chain pdb file."
function getStructuretospilt(pdbid::AbstractString,library::AbstractString,singlechainDB::AbstractString)
try
Structure = retrievepdb(pdbid, dir=library)
catch e1
try
file = downloadpdb(pdbid,dir=library,format=MMCIF)
Structure = read(file, MMCIF)
catch e2
file = downloadpdb(pdbid,dir=library,format=MMTF)
Structure = read(file, MMTF)
end
end
splitStructure(Structure,pdbid,library,singlechainDB)
Structure = nothing
end
"Module to download a pdb."
function downloadastructure(pdbid::AbstractString,library::AbstractString)
try
downloadpdb(pdbid,dir=library,format=PDB)
catch e1
try
downloadpdb(pdbid,dir=library,format=MMCIF)
catch e2
try
downloadpdb(pdbid,dir=library,format=MMTF)
catch e3
downloadpdb(pdbid,dir=library,format=PDBXML)
end
end
end
end
function main()
print("What's path of your pdb library?\n")
library=readline(stdin)
print("What's path of your pdb list file?\n")
pdblist=readline(stdin)
print("What's path of your output pdb library?\n")
singlechainDB=readline(stdin)
existpdblist = String[]
for file in readdir(library)
filename = split(basename(file),".")[1]
push!(existpdblist,filename)
end
for pdbline in readlines(pdblist)
Threads.@threads for pdbid in split(pdbline,",")
findfirst(isequal(pdbid),existpdblist) == nothing || getStructuretospilt(pdbid,library,singlechainDB)
end
end
end
main()
Thank you for your reading~