This commit is contained in:
commit
7a45ffb2fc
|
@ -0,0 +1,8 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
|
@ -0,0 +1,9 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,468 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DBNavigator.Project.DataEditorManager">
|
||||
<record-view-column-sorting-type value="BY_INDEX" />
|
||||
<value-preview-text-wrapping value="true" />
|
||||
<value-preview-pinned value="false" />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.DataExportManager">
|
||||
<export-instructions>
|
||||
<create-header value="true" />
|
||||
<friendly-headers value="false" />
|
||||
<quote-values-containing-separator value="true" />
|
||||
<quote-all-values value="false" />
|
||||
<value-separator value="" />
|
||||
<file-name value="" />
|
||||
<file-location value="" />
|
||||
<scope value="GLOBAL" />
|
||||
<destination value="FILE" />
|
||||
<format value="EXCEL" />
|
||||
<charset value="GBK" />
|
||||
<charset value="GBK" />
|
||||
</export-instructions>
|
||||
</component>
|
||||
<component name="DBNavigator.Project.DatabaseBrowserManager">
|
||||
<autoscroll-to-editor value="false" />
|
||||
<autoscroll-from-editor value="true" />
|
||||
<show-object-properties value="true" />
|
||||
<loaded-nodes />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.DatabaseFileManager">
|
||||
<open-files />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.EditorStateManager">
|
||||
<last-used-providers />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.ExecutionManager">
|
||||
<retain-sticky-names value="false" />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.MethodExecutionManager">
|
||||
<method-browser />
|
||||
<execution-history>
|
||||
<group-entries value="true" />
|
||||
<execution-inputs />
|
||||
</execution-history>
|
||||
<argument-values-cache />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.ObjectDependencyManager">
|
||||
<last-used-dependency-type value="INCOMING" />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.ObjectQuickFilterManager">
|
||||
<last-used-operator value="EQUAL" />
|
||||
<filters />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.ParserDiagnosticsManager">
|
||||
<diagnostics-history />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.ScriptExecutionManager" clear-outputs="true">
|
||||
<recently-used-interfaces />
|
||||
</component>
|
||||
<component name="DBNavigator.Project.Settings">
|
||||
<connections />
|
||||
<browser-settings>
|
||||
<general>
|
||||
<display-mode value="TABBED" />
|
||||
<navigation-history-size value="100" />
|
||||
<show-object-details value="false" />
|
||||
</general>
|
||||
<filters>
|
||||
<object-type-filter>
|
||||
<object-type name="SCHEMA" enabled="true" />
|
||||
<object-type name="USER" enabled="true" />
|
||||
<object-type name="ROLE" enabled="true" />
|
||||
<object-type name="PRIVILEGE" enabled="true" />
|
||||
<object-type name="CHARSET" enabled="true" />
|
||||
<object-type name="TABLE" enabled="true" />
|
||||
<object-type name="VIEW" enabled="true" />
|
||||
<object-type name="MATERIALIZED_VIEW" enabled="true" />
|
||||
<object-type name="NESTED_TABLE" enabled="true" />
|
||||
<object-type name="COLUMN" enabled="true" />
|
||||
<object-type name="INDEX" enabled="true" />
|
||||
<object-type name="CONSTRAINT" enabled="true" />
|
||||
<object-type name="DATASET_TRIGGER" enabled="true" />
|
||||
<object-type name="DATABASE_TRIGGER" enabled="true" />
|
||||
<object-type name="SYNONYM" enabled="true" />
|
||||
<object-type name="SEQUENCE" enabled="true" />
|
||||
<object-type name="PROCEDURE" enabled="true" />
|
||||
<object-type name="FUNCTION" enabled="true" />
|
||||
<object-type name="PACKAGE" enabled="true" />
|
||||
<object-type name="TYPE" enabled="true" />
|
||||
<object-type name="TYPE_ATTRIBUTE" enabled="true" />
|
||||
<object-type name="ARGUMENT" enabled="true" />
|
||||
<object-type name="DIMENSION" enabled="true" />
|
||||
<object-type name="CLUSTER" enabled="true" />
|
||||
<object-type name="DBLINK" enabled="true" />
|
||||
</object-type-filter>
|
||||
</filters>
|
||||
<sorting>
|
||||
<object-type name="COLUMN" sorting-type="NAME" />
|
||||
<object-type name="FUNCTION" sorting-type="NAME" />
|
||||
<object-type name="PROCEDURE" sorting-type="NAME" />
|
||||
<object-type name="ARGUMENT" sorting-type="POSITION" />
|
||||
</sorting>
|
||||
<default-editors>
|
||||
<object-type name="VIEW" editor-type="SELECTION" />
|
||||
<object-type name="PACKAGE" editor-type="SELECTION" />
|
||||
<object-type name="TYPE" editor-type="SELECTION" />
|
||||
</default-editors>
|
||||
</browser-settings>
|
||||
<navigation-settings>
|
||||
<lookup-filters>
|
||||
<lookup-objects>
|
||||
<object-type name="SCHEMA" enabled="true" />
|
||||
<object-type name="USER" enabled="false" />
|
||||
<object-type name="ROLE" enabled="false" />
|
||||
<object-type name="PRIVILEGE" enabled="false" />
|
||||
<object-type name="CHARSET" enabled="false" />
|
||||
<object-type name="TABLE" enabled="true" />
|
||||
<object-type name="VIEW" enabled="true" />
|
||||
<object-type name="MATERIALIZED VIEW" enabled="true" />
|
||||
<object-type name="INDEX" enabled="true" />
|
||||
<object-type name="CONSTRAINT" enabled="true" />
|
||||
<object-type name="DATASET TRIGGER" enabled="true" />
|
||||
<object-type name="DATABASE TRIGGER" enabled="true" />
|
||||
<object-type name="SYNONYM" enabled="false" />
|
||||
<object-type name="SEQUENCE" enabled="true" />
|
||||
<object-type name="PROCEDURE" enabled="true" />
|
||||
<object-type name="FUNCTION" enabled="true" />
|
||||
<object-type name="PACKAGE" enabled="true" />
|
||||
<object-type name="TYPE" enabled="true" />
|
||||
<object-type name="DIMENSION" enabled="false" />
|
||||
<object-type name="CLUSTER" enabled="false" />
|
||||
<object-type name="DBLINK" enabled="true" />
|
||||
</lookup-objects>
|
||||
<force-database-load value="false" />
|
||||
<prompt-connection-selection value="true" />
|
||||
<prompt-schema-selection value="true" />
|
||||
</lookup-filters>
|
||||
</navigation-settings>
|
||||
<dataset-grid-settings>
|
||||
<general>
|
||||
<enable-zooming value="true" />
|
||||
<enable-column-tooltip value="true" />
|
||||
</general>
|
||||
<sorting>
|
||||
<nulls-first value="true" />
|
||||
<max-sorting-columns value="4" />
|
||||
</sorting>
|
||||
<audit-columns>
|
||||
<column-names value="" />
|
||||
<visible value="true" />
|
||||
<editable value="false" />
|
||||
</audit-columns>
|
||||
</dataset-grid-settings>
|
||||
<dataset-editor-settings>
|
||||
<text-editor-popup>
|
||||
<active value="false" />
|
||||
<active-if-empty value="false" />
|
||||
<data-length-threshold value="100" />
|
||||
<popup-delay value="1000" />
|
||||
</text-editor-popup>
|
||||
<values-actions-popup>
|
||||
<show-popup-button value="true" />
|
||||
<element-count-threshold value="1000" />
|
||||
<data-length-threshold value="250" />
|
||||
</values-actions-popup>
|
||||
<general>
|
||||
<fetch-block-size value="100" />
|
||||
<fetch-timeout value="30" />
|
||||
<trim-whitespaces value="true" />
|
||||
<convert-empty-strings-to-null value="true" />
|
||||
<select-content-on-cell-edit value="true" />
|
||||
<large-value-preview-active value="true" />
|
||||
</general>
|
||||
<filters>
|
||||
<prompt-filter-dialog value="true" />
|
||||
<default-filter-type value="BASIC" />
|
||||
</filters>
|
||||
<qualified-text-editor text-length-threshold="300">
|
||||
<content-types>
|
||||
<content-type name="Text" enabled="true" />
|
||||
<content-type name="Properties" enabled="true" />
|
||||
<content-type name="XML" enabled="true" />
|
||||
<content-type name="DTD" enabled="true" />
|
||||
<content-type name="HTML" enabled="true" />
|
||||
<content-type name="XHTML" enabled="true" />
|
||||
<content-type name="CSS" enabled="true" />
|
||||
<content-type name="Java" enabled="true" />
|
||||
<content-type name="SQL" enabled="true" />
|
||||
<content-type name="PL/SQL" enabled="true" />
|
||||
<content-type name="JavaScript" enabled="true" />
|
||||
<content-type name="JSON" enabled="true" />
|
||||
<content-type name="JSON5" enabled="true" />
|
||||
<content-type name="JSP" enabled="true" />
|
||||
<content-type name="JSPx" enabled="true" />
|
||||
<content-type name="Groovy" enabled="true" />
|
||||
<content-type name="FTL" enabled="true" />
|
||||
<content-type name="VTL" enabled="true" />
|
||||
<content-type name="AIDL" enabled="true" />
|
||||
<content-type name="YAML" enabled="true" />
|
||||
<content-type name="Manifest" enabled="true" />
|
||||
</content-types>
|
||||
</qualified-text-editor>
|
||||
<record-navigation>
|
||||
<navigation-target value="VIEWER" />
|
||||
</record-navigation>
|
||||
</dataset-editor-settings>
|
||||
<code-editor-settings>
|
||||
<general>
|
||||
<show-object-navigation-gutter value="false" />
|
||||
<show-spec-declaration-navigation-gutter value="true" />
|
||||
<enable-spellchecking value="true" />
|
||||
<enable-reference-spellchecking value="false" />
|
||||
</general>
|
||||
<confirmations>
|
||||
<save-changes value="false" />
|
||||
<revert-changes value="true" />
|
||||
</confirmations>
|
||||
</code-editor-settings>
|
||||
<code-completion-settings>
|
||||
<filters>
|
||||
<basic-filter>
|
||||
<filter-element type="RESERVED_WORD" id="keyword" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="function" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="parameter" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="datatype" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="exception" selected="true" />
|
||||
<filter-element type="OBJECT" id="schema" selected="true" />
|
||||
<filter-element type="OBJECT" id="role" selected="true" />
|
||||
<filter-element type="OBJECT" id="user" selected="true" />
|
||||
<filter-element type="OBJECT" id="privilege" selected="true" />
|
||||
<user-schema>
|
||||
<filter-element type="OBJECT" id="table" selected="true" />
|
||||
<filter-element type="OBJECT" id="view" selected="true" />
|
||||
<filter-element type="OBJECT" id="materialized view" selected="true" />
|
||||
<filter-element type="OBJECT" id="index" selected="true" />
|
||||
<filter-element type="OBJECT" id="constraint" selected="true" />
|
||||
<filter-element type="OBJECT" id="trigger" selected="true" />
|
||||
<filter-element type="OBJECT" id="synonym" selected="false" />
|
||||
<filter-element type="OBJECT" id="sequence" selected="true" />
|
||||
<filter-element type="OBJECT" id="procedure" selected="true" />
|
||||
<filter-element type="OBJECT" id="function" selected="true" />
|
||||
<filter-element type="OBJECT" id="package" selected="true" />
|
||||
<filter-element type="OBJECT" id="type" selected="true" />
|
||||
<filter-element type="OBJECT" id="dimension" selected="true" />
|
||||
<filter-element type="OBJECT" id="cluster" selected="true" />
|
||||
<filter-element type="OBJECT" id="dblink" selected="true" />
|
||||
</user-schema>
|
||||
<public-schema>
|
||||
<filter-element type="OBJECT" id="table" selected="false" />
|
||||
<filter-element type="OBJECT" id="view" selected="false" />
|
||||
<filter-element type="OBJECT" id="materialized view" selected="false" />
|
||||
<filter-element type="OBJECT" id="index" selected="false" />
|
||||
<filter-element type="OBJECT" id="constraint" selected="false" />
|
||||
<filter-element type="OBJECT" id="trigger" selected="false" />
|
||||
<filter-element type="OBJECT" id="synonym" selected="false" />
|
||||
<filter-element type="OBJECT" id="sequence" selected="false" />
|
||||
<filter-element type="OBJECT" id="procedure" selected="false" />
|
||||
<filter-element type="OBJECT" id="function" selected="false" />
|
||||
<filter-element type="OBJECT" id="package" selected="false" />
|
||||
<filter-element type="OBJECT" id="type" selected="false" />
|
||||
<filter-element type="OBJECT" id="dimension" selected="false" />
|
||||
<filter-element type="OBJECT" id="cluster" selected="false" />
|
||||
<filter-element type="OBJECT" id="dblink" selected="false" />
|
||||
</public-schema>
|
||||
<any-schema>
|
||||
<filter-element type="OBJECT" id="table" selected="true" />
|
||||
<filter-element type="OBJECT" id="view" selected="true" />
|
||||
<filter-element type="OBJECT" id="materialized view" selected="true" />
|
||||
<filter-element type="OBJECT" id="index" selected="true" />
|
||||
<filter-element type="OBJECT" id="constraint" selected="true" />
|
||||
<filter-element type="OBJECT" id="trigger" selected="true" />
|
||||
<filter-element type="OBJECT" id="synonym" selected="true" />
|
||||
<filter-element type="OBJECT" id="sequence" selected="true" />
|
||||
<filter-element type="OBJECT" id="procedure" selected="true" />
|
||||
<filter-element type="OBJECT" id="function" selected="true" />
|
||||
<filter-element type="OBJECT" id="package" selected="true" />
|
||||
<filter-element type="OBJECT" id="type" selected="true" />
|
||||
<filter-element type="OBJECT" id="dimension" selected="true" />
|
||||
<filter-element type="OBJECT" id="cluster" selected="true" />
|
||||
<filter-element type="OBJECT" id="dblink" selected="true" />
|
||||
</any-schema>
|
||||
</basic-filter>
|
||||
<extended-filter>
|
||||
<filter-element type="RESERVED_WORD" id="keyword" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="function" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="parameter" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="datatype" selected="true" />
|
||||
<filter-element type="RESERVED_WORD" id="exception" selected="true" />
|
||||
<filter-element type="OBJECT" id="schema" selected="true" />
|
||||
<filter-element type="OBJECT" id="user" selected="true" />
|
||||
<filter-element type="OBJECT" id="role" selected="true" />
|
||||
<filter-element type="OBJECT" id="privilege" selected="true" />
|
||||
<user-schema>
|
||||
<filter-element type="OBJECT" id="table" selected="true" />
|
||||
<filter-element type="OBJECT" id="view" selected="true" />
|
||||
<filter-element type="OBJECT" id="materialized view" selected="true" />
|
||||
<filter-element type="OBJECT" id="index" selected="true" />
|
||||
<filter-element type="OBJECT" id="constraint" selected="true" />
|
||||
<filter-element type="OBJECT" id="trigger" selected="true" />
|
||||
<filter-element type="OBJECT" id="synonym" selected="true" />
|
||||
<filter-element type="OBJECT" id="sequence" selected="true" />
|
||||
<filter-element type="OBJECT" id="procedure" selected="true" />
|
||||
<filter-element type="OBJECT" id="function" selected="true" />
|
||||
<filter-element type="OBJECT" id="package" selected="true" />
|
||||
<filter-element type="OBJECT" id="type" selected="true" />
|
||||
<filter-element type="OBJECT" id="dimension" selected="true" />
|
||||
<filter-element type="OBJECT" id="cluster" selected="true" />
|
||||
<filter-element type="OBJECT" id="dblink" selected="true" />
|
||||
</user-schema>
|
||||
<public-schema>
|
||||
<filter-element type="OBJECT" id="table" selected="true" />
|
||||
<filter-element type="OBJECT" id="view" selected="true" />
|
||||
<filter-element type="OBJECT" id="materialized view" selected="true" />
|
||||
<filter-element type="OBJECT" id="index" selected="true" />
|
||||
<filter-element type="OBJECT" id="constraint" selected="true" />
|
||||
<filter-element type="OBJECT" id="trigger" selected="true" />
|
||||
<filter-element type="OBJECT" id="synonym" selected="true" />
|
||||
<filter-element type="OBJECT" id="sequence" selected="true" />
|
||||
<filter-element type="OBJECT" id="procedure" selected="true" />
|
||||
<filter-element type="OBJECT" id="function" selected="true" />
|
||||
<filter-element type="OBJECT" id="package" selected="true" />
|
||||
<filter-element type="OBJECT" id="type" selected="true" />
|
||||
<filter-element type="OBJECT" id="dimension" selected="true" />
|
||||
<filter-element type="OBJECT" id="cluster" selected="true" />
|
||||
<filter-element type="OBJECT" id="dblink" selected="true" />
|
||||
</public-schema>
|
||||
<any-schema>
|
||||
<filter-element type="OBJECT" id="table" selected="true" />
|
||||
<filter-element type="OBJECT" id="view" selected="true" />
|
||||
<filter-element type="OBJECT" id="materialized view" selected="true" />
|
||||
<filter-element type="OBJECT" id="index" selected="true" />
|
||||
<filter-element type="OBJECT" id="constraint" selected="true" />
|
||||
<filter-element type="OBJECT" id="trigger" selected="true" />
|
||||
<filter-element type="OBJECT" id="synonym" selected="true" />
|
||||
<filter-element type="OBJECT" id="sequence" selected="true" />
|
||||
<filter-element type="OBJECT" id="procedure" selected="true" />
|
||||
<filter-element type="OBJECT" id="function" selected="true" />
|
||||
<filter-element type="OBJECT" id="package" selected="true" />
|
||||
<filter-element type="OBJECT" id="type" selected="true" />
|
||||
<filter-element type="OBJECT" id="dimension" selected="true" />
|
||||
<filter-element type="OBJECT" id="cluster" selected="true" />
|
||||
<filter-element type="OBJECT" id="dblink" selected="true" />
|
||||
</any-schema>
|
||||
</extended-filter>
|
||||
</filters>
|
||||
<sorting enabled="true">
|
||||
<sorting-element type="RESERVED_WORD" id="keyword" />
|
||||
<sorting-element type="RESERVED_WORD" id="datatype" />
|
||||
<sorting-element type="OBJECT" id="column" />
|
||||
<sorting-element type="OBJECT" id="table" />
|
||||
<sorting-element type="OBJECT" id="view" />
|
||||
<sorting-element type="OBJECT" id="materialized view" />
|
||||
<sorting-element type="OBJECT" id="index" />
|
||||
<sorting-element type="OBJECT" id="constraint" />
|
||||
<sorting-element type="OBJECT" id="trigger" />
|
||||
<sorting-element type="OBJECT" id="synonym" />
|
||||
<sorting-element type="OBJECT" id="sequence" />
|
||||
<sorting-element type="OBJECT" id="procedure" />
|
||||
<sorting-element type="OBJECT" id="function" />
|
||||
<sorting-element type="OBJECT" id="package" />
|
||||
<sorting-element type="OBJECT" id="type" />
|
||||
<sorting-element type="OBJECT" id="dimension" />
|
||||
<sorting-element type="OBJECT" id="cluster" />
|
||||
<sorting-element type="OBJECT" id="dblink" />
|
||||
<sorting-element type="OBJECT" id="schema" />
|
||||
<sorting-element type="OBJECT" id="role" />
|
||||
<sorting-element type="OBJECT" id="user" />
|
||||
<sorting-element type="RESERVED_WORD" id="function" />
|
||||
<sorting-element type="RESERVED_WORD" id="parameter" />
|
||||
</sorting>
|
||||
<format>
|
||||
<enforce-code-style-case value="true" />
|
||||
</format>
|
||||
</code-completion-settings>
|
||||
<execution-engine-settings>
|
||||
<statement-execution>
|
||||
<fetch-block-size value="100" />
|
||||
<execution-timeout value="20" />
|
||||
<debug-execution-timeout value="600" />
|
||||
<focus-result value="false" />
|
||||
<prompt-execution value="false" />
|
||||
</statement-execution>
|
||||
<script-execution>
|
||||
<command-line-interfaces />
|
||||
<execution-timeout value="300" />
|
||||
</script-execution>
|
||||
<method-execution>
|
||||
<execution-timeout value="30" />
|
||||
<debug-execution-timeout value="600" />
|
||||
<parameter-history-size value="10" />
|
||||
</method-execution>
|
||||
</execution-engine-settings>
|
||||
<operation-settings>
|
||||
<transactions>
|
||||
<uncommitted-changes>
|
||||
<on-project-close value="ASK" />
|
||||
<on-disconnect value="ASK" />
|
||||
<on-autocommit-toggle value="ASK" />
|
||||
</uncommitted-changes>
|
||||
<multiple-uncommitted-changes>
|
||||
<on-commit value="ASK" />
|
||||
<on-rollback value="ASK" />
|
||||
</multiple-uncommitted-changes>
|
||||
</transactions>
|
||||
<session-browser>
|
||||
<disconnect-session value="ASK" />
|
||||
<kill-session value="ASK" />
|
||||
<reload-on-filter-change value="false" />
|
||||
</session-browser>
|
||||
<compiler>
|
||||
<compile-type value="KEEP" />
|
||||
<compile-dependencies value="ASK" />
|
||||
<always-show-controls value="false" />
|
||||
</compiler>
|
||||
<debugger>
|
||||
<debugger-type value="ASK" />
|
||||
<use-generic-runners value="true" />
|
||||
</debugger>
|
||||
</operation-settings>
|
||||
<ddl-file-settings>
|
||||
<extensions>
|
||||
<mapping file-type-id="VIEW" extensions="vw" />
|
||||
<mapping file-type-id="TRIGGER" extensions="trg" />
|
||||
<mapping file-type-id="PROCEDURE" extensions="prc" />
|
||||
<mapping file-type-id="FUNCTION" extensions="fnc" />
|
||||
<mapping file-type-id="PACKAGE" extensions="pkg" />
|
||||
<mapping file-type-id="PACKAGE_SPEC" extensions="pks" />
|
||||
<mapping file-type-id="PACKAGE_BODY" extensions="pkb" />
|
||||
<mapping file-type-id="TYPE" extensions="tpe" />
|
||||
<mapping file-type-id="TYPE_SPEC" extensions="tps" />
|
||||
<mapping file-type-id="TYPE_BODY" extensions="tpb" />
|
||||
</extensions>
|
||||
<general>
|
||||
<lookup-ddl-files value="true" />
|
||||
<create-ddl-files value="false" />
|
||||
<synchronize-ddl-files value="true" />
|
||||
<use-qualified-names value="false" />
|
||||
<make-scripts-rerunnable value="true" />
|
||||
</general>
|
||||
</ddl-file-settings>
|
||||
<general-settings>
|
||||
<regional-settings>
|
||||
<date-format value="MEDIUM" />
|
||||
<number-format value="UNGROUPED" />
|
||||
<locale value="SYSTEM_DEFAULT" />
|
||||
<use-custom-formats value="false" />
|
||||
</regional-settings>
|
||||
<environment>
|
||||
<environment-types>
|
||||
<environment-type id="development" name="Development" description="Development environment" color="-2430209/-12296320" readonly-code="false" readonly-data="false" />
|
||||
<environment-type id="integration" name="Integration" description="Integration environment" color="-2621494/-12163514" readonly-code="true" readonly-data="false" />
|
||||
<environment-type id="production" name="Production" description="Productive environment" color="-11574/-10271420" readonly-code="true" readonly-data="true" />
|
||||
<environment-type id="other" name="Other" description="" color="-1576/-10724543" readonly-code="false" readonly-data="false" />
|
||||
</environment-types>
|
||||
<visibility-settings>
|
||||
<connection-tabs value="true" />
|
||||
<dialog-headers value="true" />
|
||||
<object-editor-tabs value="true" />
|
||||
<script-editor-tabs value="false" />
|
||||
<execution-result-tabs value="true" />
|
||||
</visibility-settings>
|
||||
</environment>
|
||||
</general-settings>
|
||||
</component>
|
||||
<component name="DBNavigator.Project.StatementExecutionManager">
|
||||
<execution-variables />
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,270 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="EasyCodeTableSetting">
|
||||
<option name="tableInfoMap">
|
||||
<map>
|
||||
<entry key="g3fo_base.m_market_api_info">
|
||||
<value>
|
||||
<TableInfoDTO>
|
||||
<option name="fullColumn">
|
||||
<list>
|
||||
<ColumnInfoDTO>
|
||||
<option name="comment" value="ID" />
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="id" />
|
||||
<option name="type" value="java.lang.Long" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="marketApiCode" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="normalMarketFlag" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="greyMarketFlag" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="marketApiDescription" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="brokerList" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="rsakey" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="password" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="newPassword" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="remark" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="createOn" />
|
||||
<option name="type" value="java.util.Date" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="createBy" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="updateOn" />
|
||||
<option name="type" value="java.util.Date" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="updateBy" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
</list>
|
||||
</option>
|
||||
<option name="name" value="MMarketApiInfo" />
|
||||
<option name="preName" value="" />
|
||||
<option name="saveModelName" value="" />
|
||||
<option name="savePackageName" value="" />
|
||||
<option name="savePath" value="" />
|
||||
<option name="templateGroupName" value="" />
|
||||
</TableInfoDTO>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="g3fo_user.m_user_info">
|
||||
<value>
|
||||
<TableInfoDTO>
|
||||
<option name="fullColumn">
|
||||
<list>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="userId" />
|
||||
<option name="type" value="java.lang.Long" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="userName" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="localName" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="engName" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="userType" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="userStatus" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="title" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="branch" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="department" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="mobileNo" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="email" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="expiryDate" />
|
||||
<option name="type" value="java.util.Date" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="createBy" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="createOn" />
|
||||
<option name="type" value="java.util.Date" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="updateBy" />
|
||||
<option name="type" value="java.lang.String" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="updateOn" />
|
||||
<option name="type" value="java.util.Date" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="tradeOddlotFlg" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="maxClientPct" />
|
||||
<option name="type" value="java.lang.Float" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="maxDeductPct" />
|
||||
<option name="type" value="java.lang.Float" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="cnySingleOrderLimit" />
|
||||
<option name="type" value="java.lang.Float" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="cnySingleOverrideLimit" />
|
||||
<option name="type" value="java.lang.Float" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="otpLogon" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="otpChgpwd" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
<ColumnInfoDTO>
|
||||
<option name="custom" value="false" />
|
||||
<option name="ext" value="{}" />
|
||||
<option name="name" value="otpMethod" />
|
||||
<option name="type" value="java.lang.Integer" />
|
||||
</ColumnInfoDTO>
|
||||
</list>
|
||||
</option>
|
||||
<option name="name" value="MUserInfo" />
|
||||
<option name="preName" value="" />
|
||||
<option name="saveModelName" value="" />
|
||||
<option name="savePackageName" value="" />
|
||||
<option name="savePath" value="" />
|
||||
<option name="templateGroupName" value="" />
|
||||
</TableInfoDTO>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_19" project-jdk-name="Python 3.11 (untitled)" project-jdk-type="Python SDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ChatGPT_Similarity.iml" filepath="$PROJECT_DIR$/.idea/ChatGPT_Similarity.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,6 @@
|
|||
├─src
|
||||
│ └─__pycache__
|
||||
└─text
|
||||
|
||||
./text --- text files should be put here
|
||||
./src/trial --- an example of how to use
|
|
@ -0,0 +1,107 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import *"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"654\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"directory = '../text'\n",
|
||||
"\n",
|
||||
"file_list = []\n",
|
||||
"for filename in os.listdir(directory):\n",
|
||||
" if os.path.isfile(os.path.join(directory, filename)):\n",
|
||||
" file_list.append(filename)\n",
|
||||
"\n",
|
||||
"# all the news titles\n",
|
||||
"print(len(file_list))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in range(0, 50): # first 50 news title embeddings\n",
|
||||
" title_extracted = file_list[i].replace(\".html\", \"\")\n",
|
||||
" text = convert_to_simplified(title_extracted) # change to simplified chinese\n",
|
||||
" text = rmCharacters(text, punct=True, stop=True) # remove stopping words\n",
|
||||
" vector = embeddingfunc(client, text, model=\"text-embedding-3-small\", dimensions=1536) # get embedding\n",
|
||||
" save2text(file_list[i], vector) # 保存embedding"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"24 CLASSIFIED GP(08232)發布年度業績,股東應占虧損爲1534.6萬港元,同比...\n",
|
||||
"47 【公司盈警】稻草熊娱乐(02125)料2023年度盈转亏0.9-1.3亿元人民币.html\n",
|
||||
"28 FIT HON TENG(06088.HK)擬3月12日舉行董事會會議批准年度業績.html\n",
|
||||
"34 MI能源(01555.HK)盈警:預期2023年度淨虧損約1.47億至1.67億元.html\n",
|
||||
"23 BOSS直聘-W(02076.HK)3月12日舉行董事會會議審議及批准年度業績.html\n",
|
||||
"Name: text, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"simRank('财报', n=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "afe",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
import requests
|
||||
|
||||
def get_stock_news(page_num=1, page_size=10, story_start_time="2024-04-04 10:36:26.685", story_end_time="2024-04-11 10:36:26.685"):
|
||||
"""
|
||||
获取股票新闻信息。
|
||||
|
||||
参数:
|
||||
page_num (int): 页码。
|
||||
page_size (int): 每页大小。
|
||||
story_start_time (str): 故事开始时间。
|
||||
story_end_time (str): 故事结束时间。
|
||||
|
||||
返回:
|
||||
dict: 响应内容,假设为JSON格式。
|
||||
"""
|
||||
url = 'http://192.168.3.210:9205/api/news/ai/assist/stockNews'
|
||||
params = {
|
||||
"pageNum": page_num,
|
||||
"pageSize": page_size,
|
||||
"storyEndTime": story_end_time,
|
||||
"storyStartTime": story_start_time
|
||||
}
|
||||
headers = {"From-Source": "inner"}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response.raise_for_status() # 若请求失败,抛出HTTPError
|
||||
return response.json() # 返回解析后的JSON数据
|
||||
except requests.RequestException as e:
|
||||
print(f"请求股票新闻时发生错误: {e}")
|
||||
return None
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,95 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import *"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"654\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# import os\n",
|
||||
"\n",
|
||||
"# directory = '../text'\n",
|
||||
"\n",
|
||||
"# file_list = []\n",
|
||||
"# for filename in os.listdir(directory):\n",
|
||||
"# if os.path.isfile(os.path.join(directory, filename)):\n",
|
||||
"# file_list.append(filename)\n",
|
||||
"\n",
|
||||
"# # 打印文件名列表\n",
|
||||
"# print(len(file_list))\n",
|
||||
"\n",
|
||||
"# for i in range(0, 10):\n",
|
||||
"# with open(os.path.join(directory, file_list[i]), 'r', encoding='utf-8') as file:\n",
|
||||
"# html_content = file.read()\n",
|
||||
"# html_extracted = extract_text_from_html(html_content)\n",
|
||||
"# text = convert_to_simplified(html_extracted)\n",
|
||||
"# text = rmCharacters(text, punct=True, stop=True)\n",
|
||||
"# vector = embeddingfunc(client, text, model=\"text-embedding-3-small\", dimensions=1536)\n",
|
||||
"# save2text(file_list[i], vector)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6 3連板四川金頂(600678.SH):公司無任何形成收入的氫能源相關産品.html\n",
|
||||
"7 4連板 天永智能(603895.SH):無任何形成收入的氫能源産品 現有産品與氫能源無任何關...\n",
|
||||
"1 由于库存充足 全球液化天然气-亚洲现货液化天然气价格持平.html\n",
|
||||
"3 2024年“全球視野·下注中國”十大核心ETF最新解讀(2月月報).html\n",
|
||||
"9 5億補貼推動以舊換新,蘇寧易購發佈2024年星雲計劃.html\n",
|
||||
"Name: text, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"simRank('能源领域最近有什么新闻吗?', n=5)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "afe",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
import os
|
||||
from openai import OpenAI
|
||||
from bs4 import BeautifulSoup
|
||||
import opencc
|
||||
import re
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import json
|
||||
from numpy.linalg import norm
|
||||
|
||||
|
||||
|
||||
OpenAI.api_request_timeout=30
|
||||
client = OpenAI(api_key="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")
|
||||
|
||||
punct_regex = r"[、!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏!\"\#$%&\'\(\)\*\+,-\./:;<=>?@\[\\\]\^_`{\|}~]"
|
||||
|
||||
f = open("./stopwords_full.txt", "r", encoding='utf-8')
|
||||
stopwords_full = f.read()
|
||||
f.close()
|
||||
|
||||
stopwords_full = stopwords_full.strip().split('\n')
|
||||
|
||||
# check whether there are csv files
|
||||
text_file_path = './text.csv'
|
||||
if not os.path.isfile(text_file_path):
|
||||
pd.DataFrame(columns=['text', 'embedding']).to_csv(text_file_path, index=False)
|
||||
|
||||
question_file_path = './question.csv'
|
||||
if not os.path.isfile(question_file_path):
|
||||
pd.DataFrame(columns=['question', 'embedding']).to_csv(question_file_path, index=False)
|
||||
|
||||
|
||||
def embeddingfunc(client, text, model="text-embedding-3-small", dimensions=1536):
|
||||
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model=model, # released in Jan 2024 with text-embedding-3-large
|
||||
dimensions=dimensions # default 1536(small) with 3072(large) $0.00002/token
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
|
||||
def extract_text_from_html(html):
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
for script in soup(["script", "style"]):
|
||||
script.decompose()
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
return '\n'.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
|
||||
|
||||
def convert_to_simplified(text):
|
||||
converter = opencc.OpenCC('hk2s.json')
|
||||
simplified_text = converter.convert(text)
|
||||
return simplified_text
|
||||
|
||||
def rmCharacters(text, punct=True, stop=True):
|
||||
if punct == True:
|
||||
text_punct = re.sub(punct_regex, '', text)
|
||||
if stop == True:
|
||||
pattern = r'\b(?:{})\b'.format('|'.join(map(re.escape, stopwords_full)))
|
||||
updated_text = re.sub(pattern, '', text_punct)
|
||||
return updated_text
|
||||
|
||||
def save2question(question, embedding): # question is string
|
||||
df = pd.read_csv(question_file_path)
|
||||
if question not in df['question'].to_list():
|
||||
pd.concat([df, pd.DataFrame({"question":[question], "embedding":[embedding]})], axis=0).to_csv(question_file_path, index=False)
|
||||
|
||||
def save2text(text, embedding): # text is file path
|
||||
df = pd.read_csv(text_file_path)
|
||||
if text not in df['text'].to_list():
|
||||
pd.concat([df, pd.DataFrame({"text":[text], "embedding":[embedding]})], axis=0).to_csv(text_file_path, index=False)
|
||||
|
||||
def simRank(question, n=3):
|
||||
question = convert_to_simplified(question)
|
||||
question = rmCharacters(question, punct=True, stop=True)
|
||||
vector = embeddingfunc(client, question, model="text-embedding-3-small", dimensions=1536)
|
||||
save2question(question, vector)
|
||||
|
||||
textVector = pd.read_csv("./text.csv")
|
||||
B = np.array(vector)
|
||||
textVector['embedding'] = textVector['embedding'].apply(lambda x:np.array(json.loads(x)))
|
||||
textVector['sim'] = textVector['embedding'].apply(lambda x:np.dot(x,B)/(norm(x)*norm(B)))
|
||||
return textVector.sort_values(by=['sim'], ascending=False).head(n)['text']
|
||||
|
||||
|
||||
# https://openai.com/blog/new-embedding-models-and-api-updates?ref=upstract.com
|
||||
# 去除特殊字符 (https://github.com/blmoistawinde/HarvestText/tree/master)
|
||||
# stopping words (https://github.com/CharyHong/Stopwords/tree/main)
|
|
@ -0,0 +1,429 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import *"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"654\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"directory = '../text'\n",
|
||||
"\n",
|
||||
"file_list = []\n",
|
||||
"for filename in os.listdir(directory):\n",
|
||||
" if os.path.isfile(os.path.join(directory, filename)):\n",
|
||||
" file_list.append(filename)\n",
|
||||
"\n",
|
||||
"# all the news titles\n",
|
||||
"print(len(file_list))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0\n",
|
||||
"1\n",
|
||||
"2\n",
|
||||
"3\n",
|
||||
"4\n",
|
||||
"5\n",
|
||||
"6\n",
|
||||
"7\n",
|
||||
"8\n",
|
||||
"9\n",
|
||||
"10\n",
|
||||
"11\n",
|
||||
"12\n",
|
||||
"13\n",
|
||||
"14\n",
|
||||
"15\n",
|
||||
"16\n",
|
||||
"17\n",
|
||||
"18\n",
|
||||
"19\n",
|
||||
"20\n",
|
||||
"21\n",
|
||||
"22\n",
|
||||
"23\n",
|
||||
"24\n",
|
||||
"25\n",
|
||||
"26\n",
|
||||
"27\n",
|
||||
"28\n",
|
||||
"29\n",
|
||||
"30\n",
|
||||
"31\n",
|
||||
"32\n",
|
||||
"33\n",
|
||||
"34\n",
|
||||
"35\n",
|
||||
"36\n",
|
||||
"37\n",
|
||||
"38\n",
|
||||
"39\n",
|
||||
"40\n",
|
||||
"41\n",
|
||||
"42\n",
|
||||
"43\n",
|
||||
"44\n",
|
||||
"45\n",
|
||||
"46\n",
|
||||
"47\n",
|
||||
"48\n",
|
||||
"49\n",
|
||||
"50\n",
|
||||
"51\n",
|
||||
"52\n",
|
||||
"53\n",
|
||||
"54\n",
|
||||
"55\n",
|
||||
"56\n",
|
||||
"57\n",
|
||||
"58\n",
|
||||
"59\n",
|
||||
"60\n",
|
||||
"61\n",
|
||||
"62\n",
|
||||
"63\n",
|
||||
"64\n",
|
||||
"65\n",
|
||||
"66\n",
|
||||
"67\n",
|
||||
"68\n",
|
||||
"69\n",
|
||||
"70\n",
|
||||
"71\n",
|
||||
"72\n",
|
||||
"73\n",
|
||||
"74\n",
|
||||
"75\n",
|
||||
"76\n",
|
||||
"77\n",
|
||||
"78\n",
|
||||
"79\n",
|
||||
"80\n",
|
||||
"81\n",
|
||||
"82\n",
|
||||
"83\n",
|
||||
"84\n",
|
||||
"85\n",
|
||||
"86\n",
|
||||
"87\n",
|
||||
"88\n",
|
||||
"89\n",
|
||||
"90\n",
|
||||
"91\n",
|
||||
"92\n",
|
||||
"93\n",
|
||||
"94\n",
|
||||
"95\n",
|
||||
"96\n",
|
||||
"97\n",
|
||||
"98\n",
|
||||
"99\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(0, 100): # first 50 news title embeddings\n",
|
||||
" title_extracted = file_list[i].replace(\".html\", \"\")\n",
|
||||
" text = convert_to_simplified(title_extracted) # change to simplified chinese\n",
|
||||
" text = rmCharacters(text, punct=True, stop=True) # remove stopping words\n",
|
||||
" vector = embeddingfunc(client, text, model=\"text-embedding-3-small\", dimensions=1536) # get embedding\n",
|
||||
" save2text(file_list[i], vector) # 保存embedding\n",
|
||||
" print(i)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"386 朸浚國際(01355.HK):2023年淨虧損3817.8萬港元.html\n",
|
||||
"130 云音乐(09899)2023年度扭亏为盈7.34亿元人民币.html\n",
|
||||
"96 中國環境資源(01130.HK)中期淨虧損4609.1萬港元.html\n",
|
||||
"81 中国环境资源(01130)发布中期业绩 股东应占亏损4609.1万港元 同比扩大1517.8...\n",
|
||||
"95 中國環境資源(01130)發布中期業績 股東應占虧損4609.1萬港元 同比擴大1517.8...\n",
|
||||
"Name: text, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"simRank('亏损', n=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"股票新闻获取成功:\n",
|
||||
"{'code': 200, 'msg': 'success', 'data': [{'newsId': '752312301192781824', 'headLine': 'A股異動丨新瀚新材跌超7% Q4營收淨利均下滑', 'storyDateTime': '2024-03-22 11:06:29', 'source': '格隆匯', 'viewingMethod': 1, 'newsLink': 'https://www.gelonghui.com/news/4714175', 'lang': 'tc', 'viewsNumber': 0, 'heatIndex': 9, 'prosConsIndex': 9, 'newsSummary': None, 'stockRealTimeInfoList': None, 'influence': None, 'hasNewsSummary': False, 'hasTrendAnalysis': False}, {'newsId': '752312300106457088', 'headLine': '隔夜shibor报1.7720% 上涨0.10个基点', 'storyDateTime': '2024-03-22 11:06:22', 'source': '金吾財訊', 'viewingMethod': 1, 'newsLink': None, 'lang': 'zh', 'viewsNumber': 0, 'heatIndex': 7, 'prosConsIndex': 7, 'newsSummary': None, 'stockRealTimeInfoList': None, 'influence': None, 'hasNewsSummary': False, 'hasTrendAnalysis': False}, {'newsId': '752312300106457089', 'headLine': '【券商聚焦】中信里昂:拼多多(PDD)去年国内外电商市场份额持续增长 予目标价180美元', 'storyDateTime': '2024-03-22 11:06:15', 'source': '金吾財訊', 'viewingMethod': 1, 'newsLink': None, 'lang': 'zh', 'viewsNumber': 0, 'heatIndex': 7, 'prosConsIndex': 7, 'newsSummary': None, 'stockRealTimeInfoList': None, 'influence': None, 'hasNewsSummary': False, 'hasTrendAnalysis': False}, {'newsId': '752312305198342146', 'headLine': '中信銀行副行長鬍罡:整體資產質量趨勢持續向好', 'storyDateTime': '2024-03-22 11:06:13', 'source': '格隆匯', 'viewingMethod': 1, 'newsLink': 'https://www.gelonghui.com/live/1456563', 'lang': 'tc', 'viewsNumber': 0, 'heatIndex': 0, 'prosConsIndex': None, 'newsSummary': '中信銀行今日召開2023年度業績發佈會,中信銀行副行長、風險總監胡罡表示,我行的整體資產質量趨勢持續向好。首先從數據上來看,近幾年主要的風險區指標持續向好,與同業比也是持續向好,不斷的往上走。另外,從資產質量的結果來看,一個是2.37萬億的主要是投向國債、三A的信用債等方面,風險較低;另一個方面是1.73萬億的理財管理資產,這一塊資產比較安全。', 'stockRealTimeInfoList': None, 'influence': None, 'hasNewsSummary': True, 'hasTrendAnalysis': False}, {'newsId': '752312304443367424', 'headLine': '生意社:近期碳酸锂價格穩中上行', 'storyDateTime': '2024-03-22 11:06:01', 'source': '智通財經', 'viewingMethod': 1, 'newsLink': 'http://www.zhitongcaijing.com/content/detail/1090316.html', 'lang': 'tc', 'viewsNumber': 0, 'heatIndex': 0, 'prosConsIndex': None, 'newsSummary': None, 'stockRealTimeInfoList': None, 'influence': None, 'hasNewsSummary': False, 'hasTrendAnalysis': False}]}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>newsId</th>\n",
|
||||
" <th>headLine</th>\n",
|
||||
" <th>storyDateTime</th>\n",
|
||||
" <th>source</th>\n",
|
||||
" <th>viewingMethod</th>\n",
|
||||
" <th>newsLink</th>\n",
|
||||
" <th>lang</th>\n",
|
||||
" <th>viewsNumber</th>\n",
|
||||
" <th>heatIndex</th>\n",
|
||||
" <th>prosConsIndex</th>\n",
|
||||
" <th>newsSummary</th>\n",
|
||||
" <th>stockRealTimeInfoList</th>\n",
|
||||
" <th>influence</th>\n",
|
||||
" <th>hasNewsSummary</th>\n",
|
||||
" <th>hasTrendAnalysis</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>752312301192781824</td>\n",
|
||||
" <td>A股異動丨新瀚新材跌超7% Q4營收淨利均下滑</td>\n",
|
||||
" <td>2024-03-22 11:06:29</td>\n",
|
||||
" <td>格隆匯</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>https://www.gelonghui.com/news/4714175</td>\n",
|
||||
" <td>tc</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>752312300106457088</td>\n",
|
||||
" <td>隔夜shibor报1.7720% 上涨0.10个基点</td>\n",
|
||||
" <td>2024-03-22 11:06:22</td>\n",
|
||||
" <td>金吾財訊</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>zh</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>752312300106457089</td>\n",
|
||||
" <td>【券商聚焦】中信里昂:拼多多(PDD)去年国内外电商市场份额持续增长 予目标价180美元</td>\n",
|
||||
" <td>2024-03-22 11:06:15</td>\n",
|
||||
" <td>金吾財訊</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>zh</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>752312305198342146</td>\n",
|
||||
" <td>中信銀行副行長鬍罡:整體資產質量趨勢持續向好</td>\n",
|
||||
" <td>2024-03-22 11:06:13</td>\n",
|
||||
" <td>格隆匯</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>https://www.gelonghui.com/live/1456563</td>\n",
|
||||
" <td>tc</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>中信銀行今日召開2023年度業績發佈會,中信銀行副行長、風險總監胡罡表示,我行的整體資產質量...</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>752312304443367424</td>\n",
|
||||
" <td>生意社:近期碳酸锂價格穩中上行</td>\n",
|
||||
" <td>2024-03-22 11:06:01</td>\n",
|
||||
" <td>智通財經</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://www.zhitongcaijing.com/content/detail/1...</td>\n",
|
||||
" <td>tc</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" newsId headLine \\\n",
|
||||
"0 752312301192781824 A股異動丨新瀚新材跌超7% Q4營收淨利均下滑 \n",
|
||||
"1 752312300106457088 隔夜shibor报1.7720% 上涨0.10个基点 \n",
|
||||
"2 752312300106457089 【券商聚焦】中信里昂:拼多多(PDD)去年国内外电商市场份额持续增长 予目标价180美元 \n",
|
||||
"3 752312305198342146 中信銀行副行長鬍罡:整體資產質量趨勢持續向好 \n",
|
||||
"4 752312304443367424 生意社:近期碳酸锂價格穩中上行 \n",
|
||||
"\n",
|
||||
" storyDateTime source viewingMethod \\\n",
|
||||
"0 2024-03-22 11:06:29 格隆匯 1 \n",
|
||||
"1 2024-03-22 11:06:22 金吾財訊 1 \n",
|
||||
"2 2024-03-22 11:06:15 金吾財訊 1 \n",
|
||||
"3 2024-03-22 11:06:13 格隆匯 1 \n",
|
||||
"4 2024-03-22 11:06:01 智通財經 1 \n",
|
||||
"\n",
|
||||
" newsLink lang viewsNumber \\\n",
|
||||
"0 https://www.gelonghui.com/news/4714175 tc 0 \n",
|
||||
"1 None zh 0 \n",
|
||||
"2 None zh 0 \n",
|
||||
"3 https://www.gelonghui.com/live/1456563 tc 0 \n",
|
||||
"4 http://www.zhitongcaijing.com/content/detail/1... tc 0 \n",
|
||||
"\n",
|
||||
" heatIndex prosConsIndex \\\n",
|
||||
"0 9 9.0 \n",
|
||||
"1 7 7.0 \n",
|
||||
"2 7 7.0 \n",
|
||||
"3 0 NaN \n",
|
||||
"4 0 NaN \n",
|
||||
"\n",
|
||||
" newsSummary stockRealTimeInfoList \\\n",
|
||||
"0 None None \n",
|
||||
"1 None None \n",
|
||||
"2 None None \n",
|
||||
"3 中信銀行今日召開2023年度業績發佈會,中信銀行副行長、風險總監胡罡表示,我行的整體資產質量... None \n",
|
||||
"4 None None \n",
|
||||
"\n",
|
||||
" influence hasNewsSummary hasTrendAnalysis \n",
|
||||
"0 None False False \n",
|
||||
"1 None False False \n",
|
||||
"2 None False False \n",
|
||||
"3 None True False \n",
|
||||
"4 None False False "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from http_requester import get_stock_news\n",
|
||||
"import pandas as pd\n",
|
||||
"# 调用函数获取股票新闻\n",
|
||||
"result = get_stock_news(page_num=1, page_size=5)\n",
|
||||
"\n",
|
||||
"if result is not None:\n",
|
||||
" print(\"股票新闻获取成功:\")\n",
|
||||
" print(result)\n",
|
||||
"else:\n",
|
||||
" print(\"股票新闻获取失败。\")\n",
|
||||
"data = result['data']\n",
|
||||
"\n",
|
||||
"# 将数据转换为DataFrame\n",
|
||||
"df = pd.json_normalize(data)\n",
|
||||
"\n",
|
||||
"df "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,31 @@
|
|||
import requests
|
||||
|
||||
def get_stock_news(page_num=1, page_size=10, story_start_time="2024-04-04 10:36:26.685", story_end_time="2024-04-11 10:36:26.685"):
|
||||
"""
|
||||
获取股票新闻信息。
|
||||
|
||||
参数:
|
||||
page_num (int): 页码。
|
||||
page_size (int): 每页大小。
|
||||
story_start_time (str): 故事开始时间。
|
||||
story_end_time (str): 故事结束时间。
|
||||
|
||||
返回:
|
||||
dict: 响应内容,假设为JSON格式。
|
||||
"""
|
||||
url = 'http://192.168.3.210:9205/api/news/ai/assist/stockNews'
|
||||
params = {
|
||||
"pageNum": page_num,
|
||||
"pageSize": page_size,
|
||||
"storyEndTime": story_end_time,
|
||||
"storyStartTime": story_start_time
|
||||
}
|
||||
headers = {"From-Source": "inner"}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response.raise_for_status() # 若请求失败,抛出HTTPError
|
||||
return response.json() # 返回解析后的JSON数据
|
||||
except requests.RequestException as e:
|
||||
print(f"请求股票新闻时发生错误: {e}")
|
||||
return None
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,94 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import *"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import os\n",
|
||||
"\n",
|
||||
"# directory = '../text'\n",
|
||||
"\n",
|
||||
"# file_list = []\n",
|
||||
"# for filename in os.listdir(directory):\n",
|
||||
"# if os.path.isfile(os.path.join(directory, filename)):\n",
|
||||
"# file_list.append(filename)\n",
|
||||
"\n",
|
||||
"# # 打印文件名列表\n",
|
||||
"# print(len(file_list))\n",
|
||||
"\n",
|
||||
"# for i in range(0, 10):\n",
|
||||
"# with open(os.path.join(directory, file_list[i]), 'r', encoding='utf-8') as file:\n",
|
||||
"# html_content = file.read()\n",
|
||||
"# html_extracted = extract_text_from_html(html_content)\n",
|
||||
"# text = convert_to_simplified(html_extracted)\n",
|
||||
"# text = rmCharacters(text, punct=True, stop=True)\n",
|
||||
"# vector = embeddingfunc(client, text, model=\"text-embedding-3-small\", dimensions=1536)\n",
|
||||
"# save2text(file_list[i], vector)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"34 MI能源(01555.HK)盈警:預期2023年度淨虧損約1.47億至1.67億元.html\n",
|
||||
"6 3連板四川金頂(600678.SH):公司無任何形成收入的氫能源相關産品.html\n",
|
||||
"28 FIT HON TENG(06088.HK)擬3月12日舉行董事會會議批准年度業績.html\n",
|
||||
"47 【公司盈警】稻草熊娱乐(02125)料2023年度盈转亏0.9-1.3亿元人民币.html\n",
|
||||
"3 2024年“全球視野·下注中國”十大核心ETF最新解讀(2月月報).html\n",
|
||||
"Name: text, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"simRank('能源领域的财报?', n=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
import os
|
||||
from openai import OpenAI
|
||||
from bs4 import BeautifulSoup
|
||||
import opencc
|
||||
import re
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import json
|
||||
from numpy.linalg import norm
|
||||
|
||||
|
||||
|
||||
OpenAI.api_request_timeout=30
|
||||
client = OpenAI(api_key="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")
|
||||
|
||||
punct_regex = r"[、!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏!\"\#$%&\'\(\)\*\+,-\./:;<=>?@\[\\\]\^_`{\|}~]"
|
||||
|
||||
f = open("./stopwords_full.txt", "r", encoding='utf-8')
|
||||
stopwords_full = f.read()
|
||||
f.close()
|
||||
|
||||
stopwords_full = stopwords_full.strip().split('\n')
|
||||
|
||||
# check whether there are csv files
|
||||
text_file_path = './text.csv'
|
||||
if not os.path.isfile(text_file_path):
|
||||
pd.DataFrame(columns=['text', 'embedding']).to_csv(text_file_path, index=False)
|
||||
|
||||
question_file_path = './question.csv'
|
||||
if not os.path.isfile(question_file_path):
|
||||
pd.DataFrame(columns=['question', 'embedding']).to_csv(question_file_path, index=False)
|
||||
|
||||
|
||||
def embeddingfunc(client, text, model="text-embedding-3-small", dimensions=1536):
|
||||
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model=model, # released in Jan 2024 with text-embedding-3-large
|
||||
dimensions=dimensions # default 1536(small) with 3072(large) $0.00002/token
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
|
||||
def extract_text_from_html(html):
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
for script in soup(["script", "style"]):
|
||||
script.decompose()
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
return '\n'.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
|
||||
|
||||
def convert_to_simplified(text):
|
||||
converter = opencc.OpenCC('hk2s.json')
|
||||
simplified_text = converter.convert(text)
|
||||
return simplified_text
|
||||
|
||||
def rmCharacters(text, punct=True, stop=True):
|
||||
if punct == True:
|
||||
text_punct = re.sub(punct_regex, '', text)
|
||||
if stop == True:
|
||||
pattern = r'\b(?:{})\b'.format('|'.join(map(re.escape, stopwords_full)))
|
||||
updated_text = re.sub(pattern, '', text_punct)
|
||||
return updated_text
|
||||
|
||||
def save2question(question, embedding): # question is string
|
||||
df = pd.read_csv(question_file_path)
|
||||
if question not in df['question'].to_list():
|
||||
pd.concat([df, pd.DataFrame({"question":[question], "embedding":[embedding]})], axis=0).to_csv(question_file_path, index=False)
|
||||
|
||||
def save2text(text, embedding): # text is file path
|
||||
df = pd.read_csv(text_file_path)
|
||||
if text not in df['text'].to_list():
|
||||
pd.concat([df, pd.DataFrame({"text":[text], "embedding":[embedding]})], axis=0).to_csv(text_file_path, index=False)
|
||||
|
||||
def simRank(question, n=3):
|
||||
question = convert_to_simplified(question)
|
||||
question = rmCharacters(question, punct=True, stop=True)
|
||||
vector = embeddingfunc(client, question, model="text-embedding-3-small", dimensions=1536)
|
||||
save2question(question, vector)
|
||||
|
||||
textVector = pd.read_csv("./text.csv")
|
||||
B = np.array(vector)
|
||||
textVector['embedding'] = textVector['embedding'].apply(lambda x:np.array(json.loads(x)))
|
||||
textVector['sim'] = textVector['embedding'].apply(lambda x:np.dot(x,B)/(norm(x)*norm(B)))
|
||||
return textVector.sort_values(by=['sim'], ascending=False).head(n)['text']
|
||||
|
||||
|
||||
# https://openai.com/blog/new-embedding-models-and-api-updates?ref=upstract.com
|
||||
# 去除特殊字符 (https://github.com/blmoistawinde/HarvestText/tree/master)
|
||||
# stopping words (https://github.com/CharyHong/Stopwords/tree/main)
|
Loading…
Reference in New Issue