update
Browse files
examples/fsmn_vad_by_webrtcvad/run.sh
CHANGED
|
@@ -2,18 +2,25 @@
|
|
| 2 |
|
| 3 |
: <<'END'
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
bash run.sh --stage 1 --stop_stage 1 --system_version centos \
|
| 6 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 7 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 8 |
-
--
|
| 9 |
-
--
|
| 10 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
| 11 |
|
| 12 |
bash run.sh --stage 3 --stop_stage 3 --system_version centos \
|
| 13 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 14 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 15 |
-
--
|
| 16 |
-
--
|
| 17 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
| 18 |
|
| 19 |
|
|
@@ -89,10 +96,10 @@ $verbose && echo "system_version: ${system_version}"
|
|
| 89 |
$verbose && echo "file_folder_name: ${file_folder_name}"
|
| 90 |
|
| 91 |
if [ $system_version == "windows" ]; then
|
| 92 |
-
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/
|
| 93 |
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
|
| 94 |
-
#source /data/local/bin/
|
| 95 |
-
alias python3='/data/local/bin/
|
| 96 |
fi
|
| 97 |
|
| 98 |
|
|
|
|
| 2 |
|
| 3 |
: <<'END'
|
| 4 |
|
| 5 |
+
bash run.sh --stage 1 --stop_stage 1 --system_version windows \
|
| 6 |
+
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 7 |
+
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 8 |
+
--noise_patterns "D:/Users/tianx/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
|
| 9 |
+
--speech_patterns "D:/Users/tianx/HuggingDatasets/nx_noise/data/speech/**/*.wav"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
bash run.sh --stage 1 --stop_stage 1 --system_version centos \
|
| 13 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 14 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 15 |
+
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
|
| 16 |
+
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
|
| 17 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
| 18 |
|
| 19 |
bash run.sh --stage 3 --stop_stage 3 --system_version centos \
|
| 20 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 21 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
| 22 |
+
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
|
| 23 |
+
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
|
| 24 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
| 25 |
|
| 26 |
|
|
|
|
| 96 |
$verbose && echo "file_folder_name: ${file_folder_name}"
|
| 97 |
|
| 98 |
if [ $system_version == "windows" ]; then
|
| 99 |
+
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/cc_vad/Scripts/python.exe'
|
| 100 |
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
|
| 101 |
+
#source /data/local/bin/cc_vad/bin/activate
|
| 102 |
+
alias python3='/data/local/bin/cc_vad/bin/python3'
|
| 103 |
fi
|
| 104 |
|
| 105 |
|
examples/fsmn_vad_by_webrtcvad/step_1_prepare_data.py
CHANGED
|
@@ -35,7 +35,7 @@ def get_args():
|
|
| 35 |
parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
|
| 36 |
|
| 37 |
parser.add_argument("--duration", default=8.0, type=float)
|
| 38 |
-
parser.add_argument("--min_speech_duration", default=
|
| 39 |
parser.add_argument("--max_speech_duration", default=8.0, type=float)
|
| 40 |
parser.add_argument("--min_snr_db", default=-10, type=float)
|
| 41 |
parser.add_argument("--max_snr_db", default=20, type=float)
|
|
@@ -56,7 +56,7 @@ def target_second_noise_signal_generator(filename_patterns: List[str],
|
|
| 56 |
|
| 57 |
for epoch_idx in range(max_epoch):
|
| 58 |
for filename_pattern in filename_patterns:
|
| 59 |
-
for filename in glob(filename_pattern):
|
| 60 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
| 61 |
|
| 62 |
if signal.ndim != 1:
|
|
@@ -109,7 +109,7 @@ def target_second_speech_signal_generator(filename_patterns: List[str],
|
|
| 109 |
sample_rate: int = 8000, max_epoch: int = 1):
|
| 110 |
for epoch_idx in range(max_epoch):
|
| 111 |
for filename_pattern in filename_patterns:
|
| 112 |
-
for filename in glob(filename_pattern):
|
| 113 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
| 114 |
raw_duration = librosa.get_duration(y=signal, sr=sample_rate)
|
| 115 |
|
|
|
|
| 35 |
parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
|
| 36 |
|
| 37 |
parser.add_argument("--duration", default=8.0, type=float)
|
| 38 |
+
parser.add_argument("--min_speech_duration", default=4.0, type=float)
|
| 39 |
parser.add_argument("--max_speech_duration", default=8.0, type=float)
|
| 40 |
parser.add_argument("--min_snr_db", default=-10, type=float)
|
| 41 |
parser.add_argument("--max_snr_db", default=20, type=float)
|
|
|
|
| 56 |
|
| 57 |
for epoch_idx in range(max_epoch):
|
| 58 |
for filename_pattern in filename_patterns:
|
| 59 |
+
for filename in glob(filename_pattern, recursive=True):
|
| 60 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
| 61 |
|
| 62 |
if signal.ndim != 1:
|
|
|
|
| 109 |
sample_rate: int = 8000, max_epoch: int = 1):
|
| 110 |
for epoch_idx in range(max_epoch):
|
| 111 |
for filename_pattern in filename_patterns:
|
| 112 |
+
for filename in glob(filename_pattern, recursive=True):
|
| 113 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
| 114 |
raw_duration = librosa.get_duration(y=signal, sr=sample_rate)
|
| 115 |
|