1、 使用FastQC软件对数据进行质控检测
fastqc -t 16 -o ${dir}/fastqc_report/ ${dir}/clean_data/*.fq.gz
2、 使用Trim Galore软件对三组数据进行质控,去掉20bp以下的reads
1.对HeLa细胞数据进行处理
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HeLa_T_PAR_CLIP_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HeLa_T_PAR_CLIP_Clean_Data2.fq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HeLa_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HeLa_Clean_Data2.fq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_HeLa_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_HeLa_Clean_Data2.fq.gz
2.对HCT116细胞数据进行处理
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HCT116_T_PAR_CLIP_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HCT116_T_PAR_CLIP_Clean_Data2.fq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HCT116_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HCT116_Clean_Data2.fq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_HCT116_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_HCT116_Clean_Data2.fq.gz
3.对293T细胞数据进行处理
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 --paired /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_293T_T_PAR_CLIP_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_293T_T_PAR_CLIP_Clean_Data2.fq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_293T_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_293T_Clean_Data2.fq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 16 --paired /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_293T_Clean_Data1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_293T_Clean_Data2.fq.gz
3. 使用gffread-0.12.1软件将45S 和5S rRNA的GFF3注释文件转换为GTF格式
参考文章:gffcompare和gffread
Usage: gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>]
[-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]]
[-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]
[-i <maxintron>] [--stream] [--bed] [--table <attrlist>] [--sort-by <ref.lst>]
(base) lizexing@bio:~/reference/h_45S_rDNA$ gffread U13369.1.gff3 -T -o U13369.1.gtf
(base) lizexing@bio:~/reference/h_5S_rDNA$ gffread NR_023363.1.gff3 -T -o NR_023363.1.gtf
4. 使用STAR软件对45S 和5S rRNA构建索引、对GRCh38.dna.primary_assembly、GRCh38.ncRNA、GRCh38.cds.all构建索引
参考文章:比对软件STAR的使用
--runThreadN是指你要用几个cpu来运行;
--genomeDir构建索引输出文件的目录;
--genomeFastaFiles你的基因组fasta文件所在的目录
--limitGenomeGenerateRAM 43749387189 STAR消耗内存太大,输入限制内存数目防止出错,感谢孙小雨帮忙
(base) lizexing@bio:~$ STAR --runMode genomeGenerate --runThreadN 16 --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --genomeFastaFiles /Data/lizexing/reference/h_45S_rDNA/U13369.1.fasta
Sep 05 14:14:23 ..... started STAR run
Sep 05 14:14:23 ... starting to generate Genome files
!!!!! WARNING: --genomeSAindexNbases 14 is too large for the genome size=42999, which may cause seg-fault at the mapping step. Re-run genome generation with recommended --genomeSAindexNbases 6
Sep 05 14:14:23 ... starting to sort Suffix Array. This may take a long time...
Sep 05 14:14:23 ... sorting Suffix Array chunks and saving them to disk...
Sep 05 14:14:23 ... loading chunks from disk, packing SA...
Sep 05 14:14:23 ... finished generating suffix array
Sep 05 14:14:23 ... generating Suffix Array index
Sep 05 14:14:26 ... completed Suffix Array index
Sep 05 14:14:26 ... writing Genome to disk ...
Sep 05 14:14:26 ... writing Suffix Array to disk ...
Sep 05 14:14:26 ... writing SAindex to disk
Sep 05 14:14:28 ..... finished successfully
(base) lizexing@bio:~$ STAR --runMode genomeGenerate --runThreadN 16 --genomeDir /Data/lizexing/reference/h_5S_rDNA/ --genomeFastaFiles /Data/lizexing/reference/h_5S_rDNA/NR_023363.1.fasta
Dec 15 19:47:24 ..... started STAR run
Dec 15 19:47:24 ... starting to generate Genome files
!!!!! WARNING: --genomeSAindexNbases 14 is too large for the genome size=121, which may cause seg-fault at the mapping step. Re-run genome generation with recommended --genomeSAindexNbases 2
Dec 15 19:47:24 ... starting to sort Suffix Array. This may take a long time...
Dec 15 19:47:24 ... sorting Suffix Array chunks and saving them to disk...
Dec 15 19:47:24 ... loading chunks from disk, packing SA...
Dec 15 19:47:24 ... finished generating suffix array
Dec 15 19:47:24 ... generating Suffix Array index
Dec 15 19:47:27 ... completed Suffix Array index
Dec 15 19:47:27 ... writing Genome to disk ...
Dec 15 19:47:27 ... writing Suffix Array to disk ...
Dec 15 19:47:27 ... writing SAindex to disk
Dec 15 19:47:31 ..... finished successfully
(base) lizexing@bio:~/reference/Ensembl_GRCh38$ STAR --runMode genomeGenerate --runThreadN 40 --limitGenomeGenerateRAM 82424365322 --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_dna_primary_assembly_index --genomeFastaFiles /Data/lizexing/reference/Ensembl_GRCh38/Homo_sapiens.GRCh38.dna.primary_assembly.fa
Mar 06 14:29:42 ..... started STAR run
Mar 06 14:29:42 ... starting to generate Genome files
Mar 06 14:30:58 ... starting to sort Suffix Array. This may take a long time...
Mar 06 14:31:18 ... sorting Suffix Array chunks and saving them to disk...
Mar 06 14:44:13 ... loading chunks from disk, packing SA...
Mar 06 14:45:46 ... finished generating suffix array
Mar 06 14:45:46 ... generating Suffix Array index
Mar 06 14:49:53 ... completed Suffix Array index
Mar 06 14:49:53 ... writing Genome to disk ...
Mar 06 14:49:55 ... writing Suffix Array to disk ...
Mar 06 14:50:18 ... writing SAindex to disk
Mar 06 14:50:20 ..... finished successfully
(base) lizexing@bio:~/reference/Ensembl_GRCh38$ STAR --runMode genomeGenerate --runThreadN 16 --limitGenomeGenerateRAM 82424365322 --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index --genomeFastaFiles /Data/lizexing/reference/Ensembl_GRCh38/Homo_sapiens.GRCh38.cds.all.fa
Mar 05 10:59:02 ..... started STAR run
Mar 05 10:59:03 ... starting to generate Genome files
!!!!! WARNING: --genomeSAindexNbases 14 is too large for the genome size=137654284, which may cause seg-fault at the mapping step. Re-run genome generation with recommended --genomeSAindexNbases 12
Mar 05 11:00:53 ... starting to sort Suffix Array. This may take a long time...
Mar 05 11:02:49 ... sorting Suffix Array chunks and saving them to disk...
Mar 05 11:04:45 ... loading chunks from disk, packing SA...
Mar 05 11:05:50 ... finished generating suffix array
Mar 05 11:05:50 ... generating Suffix Array index
Mar 05 11:06:41 ... completed Suffix Array index
Mar 05 11:06:41 ... writing Genome to disk ...
Mar 05 11:07:17 ... writing Suffix Array to disk ...
Mar 05 11:07:18 ... writing SAindex to disk
Mar 05 11:07:19 ..... finished successfully
(base) lizexing@bio:~/reference/Ensembl_GRCh38$ STAR --runMode genomeGenerate --runThreadN 16 --limitGenomeGenerateRAM 82424365322 --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index --genomeFastaFiles /Data/lizexing/reference/Ensembl_GRCh38/Homo_sapiens.GRCh38.ncrna.fa
5. STAR比对用法和结果说明
Usage: STAR [options]... --genomeDir /path/to/genome/index/ --readFilesIn R1.fq R2.fq
--runThreadN 40 \
--runMode alignReads \
--readFilesCommand zcat \
--quantMode TranscriptomeSAM GeneCounts \
--sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf
--twopassMode Basic \
--outSAMtype BAM Unsorted \
--outSAMunmapped None \
--genomeDir /gpfs/home/fangy04/downloads/STAR_index/GRCh38/ \
--readFilesIn /gpfs/home/fangy04/downloads/SRR8112732_1.fastq.gz /gpfs/home/fangy04/downloads/SRR8112732_2.fastq.gz \
--outFileNamePrefix DRB_TT_seq_SRR8112732
--outReadsUnmapped
--outSAMunmapped
9216920116 Jun 28 17:06 DRB_TT_seq_SRR8112732Aligned.out.bam
1166235552 Jun 28 17:06 DRB_TT_seq_SRR8112732Aligned.toTranscriptome.out.bam
2034 Jun 28 17:06 DRB_TT_seq_SRR8112732Log.final.out
20188 Jun 28 17:06 DRB_TT_seq_SRR8112732Log.out
2571 Jun 28 17:06 DRB_TT_seq_SRR8112732Log.progress.out
1585521 Jun 28 17:06 DRB_TT_seq_SRR8112732ReadsPerGene.out.tab
6732305 Jun 28 17:06 DRB_TT_seq_SRR8112732SJ.out.tab
8192 Jun 28 16:51 DRB_TT_seq_SRR8112732_STARgenome
8192 Jun 28 16:51 DRB_TT_seq_SRR8112732_STARpass1
6. 使用STAR软件对三组数据与45S rRNA进行比对
1、对HeLa测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HeLa_T_PAR_CLIP_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HeLa_T_PAR_CLIP_Clean_Data2_val_2.fq.gz --outFileNamePrefix HeLa-val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HeLa_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HeLa_Clean_Data2_val_2.fq.gz --outFileNamePrefix GFP_HeLa_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_HeLa_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_HeLa_Clean_Data2_val_2.fq.gz --outFileNamePrefix Input_HeLa_val --outReadsUnmapped Fastx
2、对HCT116测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HCT116_T_PAR_CLIP_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_HCT116_T_PAR_CLIP_Clean_Data2_val_2.fq.gz --outFileNamePrefix HCT116-val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HCT116_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_HCT116_Clean_Data2_val_2.fq.gz --outFileNamePrefix GFP_HCT116_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/CleanData/Input_HCT116_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/CleanData/Input_HCT116_Clean_Data2_val_2.fq.gz --outFileNamePrefix Input_HCT116_val --outReadsUnmapped Fastx
3、对293T测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_293T_T_PAR_CLIP_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/Data/new/Data/CleanData/T_293T_T_PAR_CLIP_Clean_Data2_val_2.fq.gz --outFileNamePrefix 293T-val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_293T_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/GFP_293T_Clean_Data2_val_2.fq.gz --outFileNamePrefix GFP_293T_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/h_45S_rDNA/ --readFilesIn /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_293T_Clean_Data1_val_1.fq.gz /Data/lizexing/projects/xindi/2021_11_16/CleanData/Input_293T_Clean_Data2_val_2.fq.gz --outFileNamePrefix Input_293T_val --outReadsUnmapped Fastx
8. 使用STAR软件对三组数据未比对上的序列与GRCh38.ncrna比对
1、对HeLa测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HeLa/45SRNA/HeLa-valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HeLa/45SRNA/HeLa-valUnmapped.out.mate2 --outFileNamePrefix HeLa_ncrna_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HeLa/45SRNA/GFP_HeLa_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HeLa/45SRNA/GFP_HeLa_valUnmapped.out.mate2 --outFileNamePrefix HeLa_ncrna_val --outReadsUnmapped Fastx
2、对HCT116测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HCT116/45SRNA/HCT116-valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HCT116/45SRNA/HCT116-valUnmapped.out.mate2 --outFileNamePrefix HCT116_ncrna_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HCT116/45SRNA/GFP_HCT116_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HCT116/45SRNA/GFP_HCT116_valUnmapped.out.mate2 --outFileNamePrefix HCT116_ncrna_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/HCT116/45SRNA/Input_HCT116_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/HCT116/45SRNA/Input_HCT116_valUnmapped.out.mate2 --outFileNamePrefix HCT116_ncrna_val --outReadsUnmapped Fastx
3、对293T测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/293T/45SRNA/293T-valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/293T/45SRNA/293T-valUnmapped.out.mate2 --outFileNamePrefix 293T_ncrna_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/293T/45SRNA/GFP_293T_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/293T/45SRNA/GFP_293T_valUnmapped.out.mate2 --outFileNamePrefix 293T_ncrna_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_ncrna_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/293T/45SRNA/Input_293T_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/293T/45SRNA/Input_293T_valUnmapped.out.mate2 --outFileNamePrefix 293T_ncrna_val --outReadsUnmapped Fastx
9. 使用STAR软件对三组数据未比对上的序列与GRCh38.cds.all比对
1、对HeLa测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HeLa/45SRNA/HeLa-valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HeLa/45SRNA/HeLa-valUnmapped.out.mate2 --outFileNamePrefix HeLa_cds_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HeLa/45SRNA/GFP_HeLa_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HeLa/45SRNA/GFP_HeLa_valUnmapped.out.mate2 --outFileNamePrefix HeLa_cds_val --outReadsUnmapped Fastx
2、对HCT116测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HCT116/45SRNA/HCT116-valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/HCT116/45SRNA/HCT116-valUnmapped.out.mate2 --outFileNamePrefix HCT116_cds_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HCT116/45SRNA/GFP_HCT116_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HCT116/45SRNA/GFP_HCT116_valUnmapped.out.mate2 --outFileNamePrefix HCT116_cds_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/HCT116/45SRNA/Input_HCT116_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/HCT116/45SRNA/Input_HCT116_valUnmapped.out.mate2 --outFileNamePrefix HCT116_cds_val --outReadsUnmapped Fastx
3、对293T测序数据进行比对
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/293T/45SRNA/293T-valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/TopBP/293T/45SRNA/293T-valUnmapped.out.mate2 --outFileNamePrefix 293T_cds_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/293T/45SRNA/GFP_293T_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/293T/45SRNA/GFP_293T_valUnmapped.out.mate2 --outFileNamePrefix 293T_cds_val --outReadsUnmapped Fastx
STAR --runThreadN 40 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted --genomeDir /Data/lizexing/reference/Ensembl_GRCh38/star_cds_all_index/ --readFilesIn /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/293T/45SRNA/Input_293T_valUnmapped.out.mate1 /Data/lizexing/projects/xindi/2022_03_05/TreatData/Input/293T/45SRNA/Input_293T_valUnmapped.out.mate2 --outFileNamePrefix 293T_cds_val --outReadsUnmapped Fastx
10. 使用featureCounts软件对三组数据read summarization
featureCounts -T 32 -a /Data/lizexing/reference/h_45S_rDNA/U13369.1.gtf -p -B -C -f -t exon -g gene_id \
-o /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HeLa/45SRNA/HeLA_val.read.count /Data/lizexing/projects/xindi/2022_03_05/TreatData/GFP/HeLa/45SRNA/GFP_HeLa_valAligned.out.bam.sort
|