зеркало из https://github.com/dotnetbio/bio.git
215 строки
6.2 KiB
C#
215 строки
6.2 KiB
C#
|
using System;
|
|||
|
using System.IO;
|
|||
|
|
|||
|
using Bio;
|
|||
|
using Bio.IO.BAM;
|
|||
|
using Bio.IO.SAM;
|
|||
|
using Bio.Util;
|
|||
|
using SamUtil.Properties;
|
|||
|
using System.Globalization;
|
|||
|
|
|||
|
namespace SamUtil
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// Class implementing Import command of SAM Utility.
|
|||
|
/// </summary>
|
|||
|
public class Import
|
|||
|
{
|
|||
|
#region Public Fields
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Input file name.
|
|||
|
/// </summary>
|
|||
|
public string InputFilename;
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Usage(Help)
|
|||
|
/// </summary>
|
|||
|
public bool Help;
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// This file is TAB-delimited.
|
|||
|
/// Each line must contain the reference name and the length of the reference, one line for each distinct reference;
|
|||
|
/// additional fields are ignored.
|
|||
|
/// </summary>
|
|||
|
public string ReferenceListFile;
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Output file name
|
|||
|
/// </summary>
|
|||
|
public string OutputFilename;
|
|||
|
#endregion
|
|||
|
|
|||
|
#region Private Fields
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// SAM object holding data from parsed file.
|
|||
|
/// </summary>
|
|||
|
private SequenceAlignmentMap _sequenceAlignmentMap;
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Whether input file SAM/BAM
|
|||
|
/// </summary>
|
|||
|
private bool _isSAM;
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// whether output file name is auto genertaed.
|
|||
|
/// </summary>
|
|||
|
private bool autoGeneratedOutputFilename = false;
|
|||
|
|
|||
|
#endregion
|
|||
|
|
|||
|
#region Public Methods
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Import converts SAM <=> BAM file formats.
|
|||
|
/// SAMUtil.exe import out.sam in.bam
|
|||
|
/// </summary>
|
|||
|
public void DoImport()
|
|||
|
{
|
|||
|
if (!string.IsNullOrEmpty(ReferenceListFile) && !File.Exists(ReferenceListFile))
|
|||
|
{
|
|||
|
throw new InvalidOperationException("File " + ReferenceListFile + " does not exist");
|
|||
|
}
|
|||
|
|
|||
|
if (string.IsNullOrEmpty(InputFilename))
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.ImportHelp);
|
|||
|
}
|
|||
|
|
|||
|
if (string.IsNullOrEmpty(OutputFilename))
|
|||
|
{
|
|||
|
autoGeneratedOutputFilename = true;
|
|||
|
}
|
|||
|
|
|||
|
PerformParse();
|
|||
|
if (_sequenceAlignmentMap == null)
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.EmptyFile);
|
|||
|
}
|
|||
|
|
|||
|
if (!string.IsNullOrEmpty(ReferenceListFile))
|
|||
|
{
|
|||
|
CreateHeader();
|
|||
|
}
|
|||
|
|
|||
|
PerformFormat();
|
|||
|
|
|||
|
if (autoGeneratedOutputFilename)
|
|||
|
{
|
|||
|
Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
#endregion
|
|||
|
|
|||
|
#region Private Methods
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Writes the SAM object to file in SAM/BAM format.
|
|||
|
/// </summary>
|
|||
|
private void PerformFormat()
|
|||
|
{
|
|||
|
if (_isSAM)
|
|||
|
{
|
|||
|
BAMFormatter format = new BAMFormatter();
|
|||
|
try
|
|||
|
{
|
|||
|
format.Format(_sequenceAlignmentMap, OutputFilename);
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.WriteBAM + Environment.NewLine + ex.Message);
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
SAMFormatter format = new SAMFormatter();
|
|||
|
try
|
|||
|
{
|
|||
|
format.Format(_sequenceAlignmentMap, OutputFilename);
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.WriteSAM + Environment.NewLine + ex.Message);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Creates the header for SAM file if header is not present.
|
|||
|
/// </summary>
|
|||
|
private void CreateHeader()
|
|||
|
{
|
|||
|
using (StreamReader reader = new StreamReader(ReferenceListFile))
|
|||
|
{
|
|||
|
_sequenceAlignmentMap.Header.ReferenceSequences.Clear();
|
|||
|
string read = reader.ReadLine();
|
|||
|
while (!string.IsNullOrEmpty(read))
|
|||
|
{
|
|||
|
string[] splitRegion = read.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries);
|
|||
|
if (splitRegion.Length > 1)
|
|||
|
{
|
|||
|
string name = splitRegion[0];
|
|||
|
long len = long.Parse(splitRegion[1], CultureInfo.InvariantCulture);
|
|||
|
_sequenceAlignmentMap.Header.ReferenceSequences.Add(new ReferenceSequenceInfo(name, len));
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.ReferenceFile);
|
|||
|
}
|
|||
|
|
|||
|
read = reader.ReadLine();
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Parses SAM/BAm file based on input file.
|
|||
|
/// </summary>
|
|||
|
private void PerformParse()
|
|||
|
{
|
|||
|
string samExtension = ".sam";
|
|||
|
string bamExtension = ".bam";
|
|||
|
|
|||
|
if (Helper.IsBAM(InputFilename))
|
|||
|
{
|
|||
|
BAMParser parser = new BAMParser();
|
|||
|
try
|
|||
|
{
|
|||
|
_sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename);
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.InvalidBAMFile, ex);
|
|||
|
}
|
|||
|
|
|||
|
if (string.IsNullOrEmpty(OutputFilename))
|
|||
|
{
|
|||
|
OutputFilename = InputFilename + samExtension;
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
SAMParser parser = new SAMParser();
|
|||
|
try
|
|||
|
{
|
|||
|
_sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename);
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
throw new InvalidOperationException(Resources.InvalidSAMFile, ex);
|
|||
|
}
|
|||
|
|
|||
|
_isSAM = true;
|
|||
|
if (string.IsNullOrEmpty(OutputFilename))
|
|||
|
{
|
|||
|
OutputFilename = InputFilename + bamExtension;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
#endregion
|
|||
|
}
|
|||
|
}
|