Skip to content

Commit

Permalink
Add new class that holds pinyin correction profile.
Browse files Browse the repository at this point in the history
  • Loading branch information
wengxt committed Mar 23, 2024
1 parent 11cbe0e commit b405964
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/libime/pinyin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(LIBIME_PINYIN_HDRS
pinyinprediction.h
shuangpindata.h
shuangpinprofile.h
pinyincorrectionprofile.h
${CMAKE_CURRENT_BINARY_DIR}/libimepinyin_export.h
)

Expand All @@ -23,6 +24,7 @@ set(LIBIME_PINYIN_SRCS
pinyinmatchstate.cpp
shuangpinprofile.cpp
pinyinprediction.cpp
pinyincorrectionprofile.cpp
)

ecm_setup_version(PROJECT
Expand Down
96 changes: 96 additions & 0 deletions src/libime/pinyin/pinyincorrectionprofile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* SPDX-FileCopyrightText: 2024-2024 CSSlayer <[email protected]>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#include "pinyincorrectionprofile.h"
#include "pinyindata.h"
#include "pinyinencoder.h"

namespace libime {

namespace {

/*
* Helper function to create mapping based on keyboard rows.
* Function assume that the key can only be corrected to the key adjcent to it.
*/
std::unordered_map<char, std::vector<char>>
mappingFromRows(const std::vector<std::string> &rows) {
std::unordered_map<char, std::vector<char>> result;
for (const auto &row : rows) {
for (size_t i = 0; i < row.size(); i++) {
std::vector<char> items;
if (i > 0) {
items.push_back(row[i - 1]);
}
if (i + 1 < row.size()) {
items.push_back(row[i + 1]);
}
result[row[i]] = std::move(items);
}
}
return result;
}

std::unordered_map<char, std::vector<char>>
getProfileMapping(BuiltinPinyinCorrectionProfile profile) {
switch (profile) {
case BuiltinPinyinCorrectionProfile::Qwerty:
return mappingFromRows({"qwertyuiop", "asdfghjkl", "zxcvbnm"});
}

return {};
}
} // namespace

class PinyinCorrectionProfilePrivate {
public:
PinyinMap pinyinMap_;
};

PinyinCorrectionProfile::PinyinCorrectionProfile(
BuiltinPinyinCorrectionProfile profile)
: PinyinCorrectionProfile(getProfileMapping(profile)) {}

PinyinCorrectionProfile::PinyinCorrectionProfile(
const std::unordered_map<char, std::vector<char>> &mapping)
: d_ptr(std::make_unique<PinyinCorrectionProfilePrivate>()) {
FCITX_D();
// Fill with the original pinyin map.
d->pinyinMap_ = getPinyinMapV2();
if (mapping.empty()) {
return;
}
// Re-map all entry with the correction mapping.
std::vector<PinyinEntry> newEntries;
for (const auto &item : d->pinyinMap_) {
for (size_t i = 0; i < item.pinyin().size(); i++) {
auto chr = item.pinyin()[i];
auto swap = mapping.find(chr);
if (swap == mapping.end() || swap->second.empty()) {
continue;
}
auto newEntry = item.pinyin();
for (auto sub : swap->second) {
newEntry[i] = sub;
newEntries.push_back(
PinyinEntry(newEntry.data(), item.initial(), item.final(),
item.flags() | PinyinFuzzyFlag::Correction));
newEntry[i] = chr;
}
}
}
for (const auto &newEntry : newEntries) {
d->pinyinMap_.insert(newEntry);
}
}

PinyinCorrectionProfile::~PinyinCorrectionProfile() = default;

const PinyinMap &PinyinCorrectionProfile::pinyinMap() const {
FCITX_D();
return d->pinyinMap_;
}

} // namespace libime
75 changes: 75 additions & 0 deletions src/libime/pinyin/pinyincorrectionprofile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* SPDX-FileCopyrightText: 2024-2024 CSSlayer <[email protected]>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#ifndef _FCITX_LIBIME_PINYIN_PINYINCORRECTIONPROFILE_H_
#define _FCITX_LIBIME_PINYIN_PINYINCORRECTIONPROFILE_H_

#include "libimepinyin_export.h"
#include <fcitx-utils/macros.h>
#include <libime/pinyin/pinyindata.h>
#include <memory>
#include <unordered_map>
#include <vector>

namespace libime {

/**
* Built-in pinyin profile mapping
*
* @since 1.1.7
*/
enum class BuiltinPinyinCorrectionProfile {
/**
* Pinyin correction based on qwerty keyboard
*/
Qwerty,
};

class PinyinCorrectionProfilePrivate;

/**
* Class that holds updated Pinyin correction mapping based on correction
* mapping.
* @since 1.1.7
*/
class LIBIMEPINYIN_EXPORT PinyinCorrectionProfile {
public:
/**
* Construct the profile based on builtin layout.
*
* @param profile built-in profile
*/
explicit PinyinCorrectionProfile(BuiltinPinyinCorrectionProfile profile);

/**
* Construct the profile based on customized mapping.
*
* E.g. w may be corrected to q,e, the mapping will contain {'w': ['q',
* 'e']}.
*
* @param mapping pinyin character and the corresponding possible wrong key.
*/
explicit PinyinCorrectionProfile(
const std::unordered_map<char, std::vector<char>> &mapping);

virtual ~PinyinCorrectionProfile();

/**
* Return the updated pinyin map
*
* New entries will be marked with PinyinFuzzyFlag::Correction
*
* @see getPinyinMapV2
*/
const PinyinMap &pinyinMap() const;

private:
FCITX_DECLARE_PRIVATE(PinyinCorrectionProfile);
std::unique_ptr<PinyinCorrectionProfilePrivate> d_ptr;
};

} // namespace libime

#endif
9 changes: 6 additions & 3 deletions src/libime/pinyin/pinyinencoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@
#define _FCITX_LIBIME_PINYIN_PINYINENCODER_H_

#include "libimepinyin_export.h"
#include <cassert>
#include <fcitx-utils/flags.h>
#include <fcitx-utils/log.h>
#include <functional>
#include <libime/core/segmentgraph.h>
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>

namespace libime {
Expand Down Expand Up @@ -53,6 +50,12 @@ enum class PinyinFuzzyFlag {
* @since 1.1.3
*/
AdvancedTypo = 1 << 18,
/**
* Enable correction based on layout profile.
*
* @since 1.1.7
*/
Correction = 1 << 19,
};

using PinyinFuzzyFlags = fcitx::Flags<PinyinFuzzyFlag>;
Expand Down

0 comments on commit b405964

Please sign in to comment.